Commit 0e0d87da authored by wz1000's avatar wz1000 Committed by Matthew Pickering

Fix and enforce validation of header for .hie files

Implements #16686

The files version is automatically generated from the current GHC
version in the same manner as normal interface files.

This means that clients can first read the version and then decide how
to read the rest of the file.
parent 495a65cb
{-
Main functions for .hie file generation
-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE UndecidableInstances #-}
......@@ -20,7 +23,6 @@ import BooleanFormula
import Class ( FunDep )
import CoreUtils ( exprType )
import ConLike ( conLikeName )
import Config ( cProjectVersion )
import Desugar ( deSugarExpr )
import FieldLabel
import HsSyn
......@@ -42,7 +44,6 @@ import HieUtils
import qualified Data.Array as A
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BSC
import qualified Data.Map as M
import qualified Data.Set as S
import Data.Data ( Data, Typeable )
......@@ -98,9 +99,7 @@ mkHieFile ms ts rs = do
let Just src_file = ml_hs_file $ ms_location ms
src <- liftIO $ BS.readFile src_file
return $ HieFile
{ hie_version = curHieVersion
, hie_ghc_version = BSC.pack cProjectVersion
, hie_hs_file = src_file
{ hie_hs_file = src_file
, hie_module = ms_mod ms
, hie_types = arr
, hie_asts = asts'
......
{-
Binary serialization for .hie files.
-}
{-# LANGUAGE ScopedTypeVariables #-}
module HieBin ( readHieFile, writeHieFile, HieName(..), toHieName ) where
module HieBin ( readHieFile, readHieFileWithVersion, HieHeader, writeHieFile, HieName(..), toHieName, HieFileResult(..), hieMagic) where
import Config ( cProjectVersion )
import GhcPrelude
import Binary
import BinIface ( getDictFastString )
import FastMutInt
......@@ -14,17 +17,23 @@ import Outputable
import PrelInfo
import SrcLoc
import UniqSupply ( takeUniqFromSupply )
import Util ( maybeRead )
import Unique
import UniqFM
import qualified Data.Array as A
import Data.IORef
import Data.ByteString ( ByteString )
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BSC
import Data.List ( mapAccumR )
import Data.Word ( Word32 )
import Control.Monad ( replicateM )
import Data.Word ( Word8, Word32 )
import Control.Monad ( replicateM, when )
import System.Directory ( createDirectoryIfMissing )
import System.FilePath ( takeDirectory )
import HieTypes
-- | `Name`'s get converted into `HieName`'s before being written into @.hie@
-- files. See 'toHieName' and 'fromHieName' for logic on how to convert between
-- these two types.
......@@ -63,10 +72,33 @@ data HieDictionary = HieDictionary
initBinMemSize :: Int
initBinMemSize = 1024*1024
writeHieFile :: Binary a => FilePath -> a -> IO ()
-- | The header for HIE files - Capital ASCII letters "HIE".
hieMagic :: [Word8]
hieMagic = [72,73,69]
hieMagicLen :: Int
hieMagicLen = length hieMagic
ghcVersion :: ByteString
ghcVersion = BSC.pack cProjectVersion
putBinLine :: BinHandle -> ByteString -> IO ()
putBinLine bh xs = do
mapM_ (putByte bh) $ BS.unpack xs
putByte bh 10 -- newline char
-- | Write a `HieFile` to the given `FilePath`, with a proper header and
-- symbol tables for `Name`s and `FastString`s
writeHieFile :: FilePath -> HieFile -> IO ()
writeHieFile hie_file_path hiefile = do
bh0 <- openBinMem initBinMemSize
-- Write the header: hieHeader followed by the
-- hieVersion and the GHC version used to generate this file
mapM_ (putByte bh0) hieMagic
putBinLine bh0 $ BSC.pack $ show hieVersion
putBinLine bh0 $ ghcVersion
-- remember where the dictionary pointer will go
dict_p_p <- tellBin bh0
put_ bh0 dict_p_p
......@@ -105,7 +137,7 @@ writeHieFile hie_file_path hiefile = do
symtab_map' <- readIORef symtab_map
putSymbolTable bh symtab_next' symtab_map'
-- write the dictionary pointer at the fornt of the file
-- write the dictionary pointer at the front of the file
dict_p <- tellBin bh
putAt bh dict_p_p dict_p
seekBin bh dict_p
......@@ -120,10 +152,87 @@ writeHieFile hie_file_path hiefile = do
writeBinMem bh hie_file_path
return ()
readHieFile :: Binary a => NameCache -> FilePath -> IO (a, NameCache)
data HieFileResult
= HieFileResult
{ hie_file_result_version :: Integer
, hie_file_result_ghc_version :: ByteString
, hie_file_result :: HieFile
}
type HieHeader = (Integer, ByteString)
-- | Read a `HieFile` from a `FilePath`. Can use
-- an existing `NameCache`. Allows you to specify
-- which versions of hieFile to attempt to read.
-- `Left` case returns the failing header versions.
readHieFileWithVersion :: (HieHeader -> Bool) -> NameCache -> FilePath -> IO (Either HieHeader (HieFileResult, NameCache))
readHieFileWithVersion readVersion nc file = do
bh0 <- readBinMem file
(hieVersion, ghcVersion) <- readHieFileHeader file bh0
if readVersion (hieVersion, ghcVersion)
then do
(hieFile, nc') <- readHieFileContents bh0 nc
return $ Right (HieFileResult hieVersion ghcVersion hieFile, nc')
else return $ Left (hieVersion, ghcVersion)
-- | Read a `HieFile` from a `FilePath`. Can use
-- an existing `NameCache`.
readHieFile :: NameCache -> FilePath -> IO (HieFileResult, NameCache)
readHieFile nc file = do
bh0 <- readBinMem file
(readHieVersion, ghcVersion) <- readHieFileHeader file bh0
-- Check if the versions match
when (readHieVersion /= hieVersion) $
panic $ unwords ["readHieFile: hie file versions don't match for file:"
, file
, "Expected"
, show hieVersion
, "but got", show readHieVersion
]
(hieFile, nc') <- readHieFileContents bh0 nc
return $ (HieFileResult hieVersion ghcVersion hieFile, nc')
readBinLine :: BinHandle -> IO ByteString
readBinLine bh = BS.pack . reverse <$> loop []
where
loop acc = do
char <- get bh :: IO Word8
if char == 10 -- ASCII newline '\n'
then return acc
else loop (char : acc)
readHieFileHeader :: FilePath -> BinHandle -> IO HieHeader
readHieFileHeader file bh0 = do
-- Read the header
magic <- replicateM hieMagicLen (get bh0)
version <- BSC.unpack <$> readBinLine bh0
case maybeRead version of
Nothing ->
panic $ unwords ["readHieFileHeader: hieVersion isn't an Integer:"
, show version
]
Just readHieVersion -> do
ghcVersion <- readBinLine bh0
-- Check if the header is valid
when (magic /= hieMagic) $
panic $ unwords ["readHieFileHeader: headers don't match for file:"
, file
, "Expected"
, show hieMagic
, "but got", show magic
]
return (readHieVersion, ghcVersion)
readHieFileContents :: BinHandle -> NameCache -> IO (HieFile, NameCache)
readHieFileContents bh0 nc = do
dict <- get_dictionary bh0
-- read the symbol table so we are capable of reading the actual data
......
{-
Functions to validate and check .hie file ASTs generated by GHC.
-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
......
{-
Types for the .hie file format are defined here.
For more information see https://gitlab.haskell.org/ghc/ghc/wikis/hie-files
-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE TypeSynonymInstances #-}
......@@ -7,6 +12,7 @@ module HieTypes where
import GhcPrelude
import Config
import Binary
import FastString ( FastString )
import IfaceType
......@@ -28,8 +34,8 @@ import Control.Applicative ( (<|>) )
type Span = RealSrcSpan
-- | Current version of @.hie@ files
curHieVersion :: Word8
curHieVersion = 0
hieVersion :: Integer
hieVersion = read (cProjectVersionInt ++ cProjectPatchLevel) :: Integer
{- |
GHC builds up a wealth of information about Haskell source as it compiles it.
......@@ -48,13 +54,7 @@ Besides saving compilation cycles, @.hie@ files also offer a more stable
interface than the GHC API.
-}
data HieFile = HieFile
{ hie_version :: Word8
-- ^ version of the HIE format
, hie_ghc_version :: ByteString
-- ^ Version of GHC that produced this file
, hie_hs_file :: FilePath
{ hie_hs_file :: FilePath
-- ^ Initial Haskell source file path
, hie_module :: Module
......@@ -74,11 +74,8 @@ data HieFile = HieFile
, hie_hs_src :: ByteString
-- ^ Raw bytes of the initial Haskell source
}
instance Binary HieFile where
put_ bh hf = do
put_ bh $ hie_version hf
put_ bh $ hie_ghc_version hf
put_ bh $ hie_hs_file hf
put_ bh $ hie_module hf
put_ bh $ hie_types hf
......@@ -93,8 +90,6 @@ instance Binary HieFile where
<*> get bh
<*> get bh
<*> get bh
<*> get bh
<*> get bh
{-
......
......@@ -174,7 +174,7 @@ import Data.Set (Set)
import HieAst ( mkHieFile )
import HieTypes ( getAsts, hie_asts )
import HieBin ( readHieFile, writeHieFile )
import HieBin ( readHieFile, writeHieFile , hie_file_result)
import HieDebug ( diffFile, validateScopes )
#include "HsVersions.h"
......@@ -434,7 +434,7 @@ extract_renamed_stuff mod_summary tc_result = do
-- Roundtrip testing
nc <- readIORef $ hsc_NC hs_env
(file', _) <- readHieFile nc out_file
case diffFile hieFile file' of
case diffFile hieFile (hie_file_result file') of
[] ->
putMsg dflags $ text "Got no roundtrip errors"
xs -> do
......
Subproject commit f01473ed28e7c2700ff8e87b00ab87a802c9edd9
Subproject commit 83bb9870a117f9426e6f6cff6fec3bb6e93a7c18
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment