Commit 8d7a1dcd authored by Duncan Coutts's avatar Duncan Coutts Committed by Edward Z. Yang
Browse files

Introduce new file format for the package database binary cache

The purpose of the new format is to make it possible for the compiler
to not depend on the Cabal library. The new cache file format contains
more or less the same information duplicated in two different sections
using different representations.

One section is basically the same as what the package db contains now,
a list of packages using the types defined in the Cabal library. This
section is read back by ghc-pkg, and used for things like ghc-pkg dump
which have to produce output using the Cabal InstalledPackageInfo text
representation.

The other section is a ghc-local type which contains a subset of the
information from the Cabal InstalledPackageInfo -- just the bits that
the compiler cares about.

The trick is that the compiler can read this second section without
needing to know the representation (or types) of the first part. The
ghc-pkg tool knows about both representations and writes both.

This patch introduces the new cache file format but does not yet use it
properly. More patches to follow. (As of this patch, the compiler reads
the part intended for ghc-pkg so it still depends on Cabal and the
ghc-local package type is not yet fully defined.)
parent ce29a260
......@@ -61,8 +61,9 @@ import Outputable
import Maybes
import System.Environment ( getEnv )
import GHC.PackageDb (readPackageDbForGhcPkg)
import Distribution.InstalledPackageInfo
import Distribution.InstalledPackageInfo.Binary
import Distribution.InstalledPackageInfo.Binary ()
import Distribution.Package hiding (depends, PackageKey, mkPackageKey)
import Distribution.ModuleExport
import FastString
......@@ -385,7 +386,8 @@ readPackageConfig dflags conf_file = do
if isdir
then do let filename = conf_file </> "package.cache"
debugTraceMsg dflags 2 (text "Using binary package database:" <+> text filename)
conf <- readBinPackageDB filename
conf <- readPackageDbForGhcPkg filename
-- TODO readPackageDbForGhc ^^ instead
return (map installedPackageInfoToPackageConfig conf)
else do
......
......@@ -14,10 +14,7 @@
-- Portability : portable
--
module Distribution.InstalledPackageInfo.Binary (
readBinPackageDB,
writeBinPackageDB
) where
module Distribution.InstalledPackageInfo.Binary () where
import Distribution.Version
import Distribution.Package hiding (depends)
......@@ -29,20 +26,6 @@ import Distribution.Text (display)
import Data.Binary as Bin
import Control.Exception as Exception
readBinPackageDB :: Binary m => FilePath -> IO [InstalledPackageInfo_ m]
readBinPackageDB file
= do xs <- Bin.decodeFile file
_ <- Exception.evaluate $ length xs
return xs
`catchUserError`
(\err -> error ("While parsing " ++ show file ++ ": " ++ err))
catchUserError :: IO a -> (String -> IO a) -> IO a
catchUserError io f = io `Exception.catch` \(ErrorCall err) -> f err
writeBinPackageDB :: Binary m => FilePath -> [InstalledPackageInfo_ m] -> IO ()
writeBinPackageDB file ipis = Bin.encodeFile file ipis
instance Binary m => Binary (InstalledPackageInfo_ m) where
put = putInstalledPackageInfo
get = getInstalledPackageInfo
......
{-# LANGUAGE CPP, RecordWildCards, NamedFieldPuns #-}
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
#if __GLASGOW_HASKELL__ >= 701
{-# LANGUAGE Trustworthy #-}
#endif
-----------------------------------------------------------------------------
-- |
-- Module : GHC.PackageDb
-- Copyright : (c) The University of Glasgow 2009, Duncan Coutts 2014
--
-- Maintainer : ghc-devs@haskell.org
-- Portability : portable
--
-- This module provides the view of GHC's database of registered packages that
-- is shared between GHC the compiler\/library, and the ghc-pkg program. It
-- defines the database format that is shared between GHC and ghc-pkg.
--
-- The database format, and this library are constructed so that GHC does not
-- have to depend on the Cabal library. The ghc-pkg program acts as the
-- gateway between the external package format (which is defined by Cabal) and
-- the internal package format which is specialised just for GHC.
--
-- GHC the compiler only needs some of the information which is kept about
-- registerd packages, such as module names, various paths etc. On the other
-- hand ghc-pkg has to keep all the information from Cabal packages and be able
-- to regurgitate it for users and other tools.
--
-- The first trick is that we duplicate some of the information in the package
-- database. We essentially keep two versions of the datbase in one file, one
-- version used only by ghc-pkg which keeps the full information (using the
-- serialised form of the 'InstalledPackageInfo' type defined by the Cabal
-- library); and a second version written by ghc-pkg and read by GHC which has
-- just the subset of information that GHC needs.
--
-- The second trick is that this module only defines in detail the format of
-- the second version -- the bit GHC uses -- and the part managed by ghc-pkg
-- is kept in the file but here we treat it as an opaque blob of data. That way
-- this library avoids depending on Cabal.
--
module GHC.PackageDb (
GhcPackageInfo(..),
readPackageDbForGhc,
readPackageDbForGhcPkg,
writePackageDb
) where
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BS.Char8
import qualified Data.ByteString.Lazy as BS.Lazy
import qualified Data.ByteString.Lazy.Internal as BS.Lazy (defaultChunkSize)
import Data.Binary as Bin
import Data.Binary.Put as Bin
import Data.Binary.Get as Bin
import Control.Exception as Exception
import Control.Monad (when)
import System.FilePath
import System.IO
import System.IO.Error
import GHC.IO.Exception (IOErrorType(InappropriateType))
import System.Directory
-- | This is a subset of Cabal's 'InstalledPackageInfo', with just the bits
-- that GHC is interested in.
--
data GhcPackageInfo = GhcPackageInfo {
--TODO
}
deriving (Eq, Show)
-- | Read the part of the package DB that GHC is interested in.
--
readPackageDbForGhc :: FilePath -> IO [GhcPackageInfo]
readPackageDbForGhc file =
decodeFromFile file getDbForGhc
where
getDbForGhc = do
_version <- getHeader
_ghcPartLen <- get :: Get Word32
ghcPart <- get :: Get [GhcPackageInfo]
-- the next part is for ghc-pkg, but we stop here.
return ghcPart
-- | Read the part of the package DB that ghc-pkg is interested in
--
readPackageDbForGhcPkg :: Binary pkgs => FilePath -> IO pkgs
readPackageDbForGhcPkg file =
decodeFromFile file getDbForGhcPkg
where
getDbForGhcPkg = do
_version <- getHeader
-- skip over the ghc part
ghcPartLen <- get :: Get Word32
_ghcPart <- skip (fromIntegral ghcPartLen)
-- the next part is for ghc-pkg
ghcPkgPart <- get
return ghcPkgPart
-- | Write the whole of the package DB, both parts.
--
writePackageDb :: Binary pkgs => FilePath -> [GhcPackageInfo] -> pkgs -> IO ()
writePackageDb file ghcPkgs ghcPkgPart =
writeFileAtomic file (runPut putDbForGhcPkg)
where
putDbForGhcPkg = do
putHeader
put ghcPartLen
putLazyByteString ghcPart
put ghcPkgPart
where
ghcPartLen :: Word32
ghcPartLen = fromIntegral (BS.Lazy.length ghcPart)
ghcPart = encode ghcPkgs
getHeader :: Get (Word32, Word32)
getHeader = do
magic <- getByteString (BS.length headerMagic)
when (magic /= headerMagic) $
fail "not a ghc-pkg db file, wrong file magic number"
majorVersion <- get :: Get Word32
-- The major version is for incompatible changes
minorVersion <- get :: Get Word32
-- The minor version is for compatible extensions
when (majorVersion /= 1) $
fail "unsupported ghc-pkg db format version"
-- If we ever support multiple major versions then we'll have to change
-- this code
-- The header can be extended without incrementing the major version,
-- we ignore fields we don't know about (currently all).
headerExtraLen <- get :: Get Word32
skip (fromIntegral headerExtraLen)
return (majorVersion, minorVersion)
putHeader :: Put
putHeader = do
putByteString headerMagic
put majorVersion
put minorVersion
put headerExtraLen
where
majorVersion = 1 :: Word32
minorVersion = 0 :: Word32
headerExtraLen = 0 :: Word32
headerMagic :: BS.ByteString
headerMagic = BS.Char8.pack "\0ghcpkg\0"
-- | Feed a 'Get' decoder with data chunks from a file.
--
decodeFromFile :: FilePath -> Get a -> IO a
decodeFromFile file decoder =
withBinaryFile file ReadMode $ \hnd ->
feed hnd (runGetIncremental decoder)
where
feed hnd (Partial k) = do chunk <- BS.hGet hnd BS.Lazy.defaultChunkSize
if BS.null chunk
then feed hnd (k Nothing)
else feed hnd (k (Just chunk))
feed _ (Done _ _ result) = return result
feed _ (Fail _ _ msg) = ioError err
where
err = mkIOError InappropriateType loc Nothing (Just file)
`ioeSetErrorString` msg
loc = "GHC.PackageDb.readPackageDb"
writeFileAtomic :: FilePath -> BS.Lazy.ByteString -> IO ()
writeFileAtomic targetPath content = do
let (targetDir, targetName) = splitFileName targetPath
Exception.bracketOnError
(openBinaryTempFileWithDefaultPermissions targetDir $ targetName <.> "tmp")
(\(tmpPath, handle) -> hClose handle >> removeFile tmpPath)
(\(tmpPath, handle) -> do
BS.Lazy.hPut handle content
hClose handle
#if mingw32_HOST_OS || mingw32_TARGET_OS
renameFile tmpPath targetPath
-- If the targetPath exists then renameFile will fail
`catchIO` \err -> do
exists <- doesFileExist targetPath
if exists
then do removeFile targetPath
-- Big fat hairy race condition
renameFile newFile targetPath
-- If the removeFile succeeds and the renameFile fails
-- then we've lost the atomic property.
else throwIOIO err
#else
renameFile tmpPath targetPath
#endif
)
instance Binary GhcPackageInfo where
put (GhcPackageInfo {-TODO-}) = do
return ()
get = do
return (GhcPackageInfo {-TODO-})
......@@ -3,7 +3,19 @@ version: 0.0.0.0
license: BSD3
maintainer: ghc-devs@haskell.org
bug-reports: glasgow-haskell-bugs@haskell.org
synopsis: A binary format for the package database
synopsis: The GHC compiler's view of the GHC package database format
description: This library is shared between GHC and ghc-pkg and is used by
GHC to read package databases.
.
It only deals with the subset of the package database that the
compiler cares about: modules paths etc and not package
metadata like description, authors etc. It is thus not a
library interface to ghc-pkg and is *not* suitable for
modifying GHC package databases.
.
The package database format and this library are constructed in
such a way that while ghc-pkg depends on Cabal, the GHC library
and program do not have to depend on Cabal.
cabal-version: >=1.10
build-type: Simple
......@@ -23,8 +35,10 @@ Library
exposed-modules:
Distribution.InstalledPackageInfo.Binary
GHC.PackageDb
build-depends: base >= 4 && < 5,
binary >= 0.5 && < 0.8,
binary >= 0.7 && < 0.8,
bytestring, directory, filepath,
Cabal >= 1.20 && < 1.22
......@@ -10,10 +10,11 @@
module Main (main) where
import Version ( version, targetOS, targetARCH )
import qualified GHC.PackageDb as GhcPkg
import Distribution.InstalledPackageInfo.Binary()
import qualified Distribution.Simple.PackageIndex as PackageIndex
import Distribution.ModuleName hiding (main)
import Distribution.InstalledPackageInfo
import Distribution.InstalledPackageInfo as Cabal
import Distribution.Compat.ReadP
import Distribution.ParseUtils
import Distribution.ModuleExport
......@@ -50,7 +51,6 @@ import GHC.IO.Exception (IOErrorType(InappropriateType))
import Data.List
import Control.Concurrent
import qualified Data.ByteString.Lazy as B
import qualified Data.Binary as Bin
import qualified Data.Binary.Get as Bin
......@@ -715,7 +715,7 @@ readParseDatabase verbosity mb_user_conf modify use_cache path
then do
when (verbosity > Normal) $
infoLn ("using cache: " ++ cache)
pkgs <- myReadBinPackageDB cache
pkgs <- GhcPkg.readPackageDbForGhcPkg cache
mkPackageDB pkgs
else do
when (verbosity >= Normal && not modify || verbosity > Normal) $ do
......@@ -740,18 +740,6 @@ readParseDatabase verbosity mb_user_conf modify use_cache path
packages = pkgs
}
-- read the package.cache file strictly, to work around a problem with
-- bytestring 0.9.0.x (fixed in 0.9.1.x) where the file wasn't closed
-- after it has been completely read, leading to a sharing violation
-- later.
myReadBinPackageDB :: FilePath -> IO [InstalledPackageInfo]
myReadBinPackageDB filepath = do
h <- openBinaryFile filepath ReadMode
sz <- hFileSize h
b <- B.hGet h (fromIntegral sz)
hClose h
return $ Bin.runGet Bin.get b
parseSingletonPackageConf :: Verbosity -> FilePath -> IO InstalledPackageInfo
parseSingletonPackageConf verbosity file = do
when (verbosity > Normal) $ infoLn ("reading package config: " ++ file)
......@@ -1016,9 +1004,16 @@ changeDBDir verbosity cmds db = do
updateDBCache :: Verbosity -> PackageDB -> IO ()
updateDBCache verbosity db = do
let filename = location db </> cachefilename
pkgsCabalFormat :: [InstalledPackageInfo]
pkgsCabalFormat = packages db
pkgsGhcCacheFormat :: [GhcPkg.GhcPackageInfo]
pkgsGhcCacheFormat = [] -- TODO: for the moment
when (verbosity > Normal) $
infoLn ("writing cache " ++ filename)
writeBinaryFileAtomic filename (packages db)
GhcPkg.writePackageDb filename pkgsGhcCacheFormat pkgsCabalFormat
`catchIO` \e ->
if isPermissionError e
then die (filename ++ ": you don't have permission to modify this file")
......@@ -1862,12 +1857,6 @@ catchError io handler = io `Exception.catch` handler'
tryIO :: IO a -> IO (Either Exception.IOException a)
tryIO = Exception.try
writeBinaryFileAtomic :: Bin.Binary a => FilePath -> a -> IO ()
writeBinaryFileAtomic targetFile obj =
withFileAtomic targetFile $ \h -> do
hSetBinaryMode h True
B.hPutStr h (Bin.encode obj)
writeFileUtf8Atomic :: FilePath -> String -> IO ()
writeFileUtf8Atomic targetFile content =
withFileAtomic targetFile $ \h -> do
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment