Commit 17f67913 authored by Duncan Coutts's avatar Duncan Coutts
Browse files

Add a source package index cache to speed up reading

e.g. about 3x faster for cabal info pkgname
parent 1afcb22f
......@@ -15,7 +15,9 @@ module Distribution.Client.IndexUtils (
getSourcePackages,
readPackageIndexFile,
parseRepoIndex,
parsePackageIndex,
readRepoIndex,
updateRepoIndexCache,
) where
import qualified Distribution.Client.Tar as Tar
......@@ -23,7 +25,7 @@ import Distribution.Client.Types
import Distribution.Package
( PackageId, PackageIdentifier(..), PackageName(..)
, Package(..), packageVersion
, Package(..), packageVersion, packageName
, Dependency(Dependency), InstalledPackageId(..) )
import Distribution.Client.PackageIndex (PackageIndex)
import qualified Distribution.Client.PackageIndex as PackageIndex
......@@ -44,31 +46,36 @@ import Distribution.ParseUtils
import Distribution.Version
( Version(Version), intersectVersionRanges )
import Distribution.Text
( simpleParse )
( display, simpleParse )
import Distribution.Verbosity
( Verbosity, lessVerbose )
import Distribution.Simple.Utils
( warn, info, fromUTF8, equating )
( die, warn, info, fromUTF8, equating )
import Data.Char (isAlphaNum)
import Data.Maybe (catMaybes, fromMaybe)
import Data.List (isPrefixOf, groupBy)
import Data.Monoid (Monoid(..))
import qualified Data.Map as Map
import Control.Monad (MonadPlus(mplus), when)
import Control.Monad (MonadPlus(mplus), when, unless, liftM)
import Control.Exception (evaluate)
import qualified Data.ByteString.Lazy as BS
import qualified Data.ByteString.Lazy.Char8 as BS.Char8
import qualified Data.ByteString.Char8 as BSS
import Data.ByteString.Lazy (ByteString)
import Distribution.Client.GZipUtils (maybeDecompress)
import System.FilePath ((</>), takeExtension, splitDirectories, normalise)
import System.FilePath.Posix as FilePath.Posix
( takeFileName )
import System.IO
import System.IO.Unsafe (unsafeInterleaveIO)
import System.IO.Error (isDoesNotExistError)
import System.Directory
( getModificationTime )
( getModificationTime, doesFileExist )
import System.Time
( getClockTime, diffClockTimes, normalizeTimeDiff, TimeDiff(tdDay) )
getInstalledPackages :: Verbosity -> Compiler
-> PackageDBStack -> ProgramConfiguration
-> IO (PackageIndex InstalledPackage)
......@@ -109,6 +116,11 @@ getInstalledPackages verbosity comp packageDbs conf =
brokenPackageId (InstalledPackageId str) =
PackageIdentifier (PackageName (str ++ "-broken")) (Version [] [])
------------------------------------------------------------------------
-- Reading the source package index
--
-- | Read a repository index from disk, from the local files specified by
-- a list of 'Repo's.
--
......@@ -138,6 +150,7 @@ getSourcePackages verbosity repos = do
packagePreferences = prefs'
}
-- | Read a repository index from disk, from the local file specified by
-- the 'Repo'.
--
......@@ -147,35 +160,23 @@ getSourcePackages verbosity repos = do
--
readRepoIndex :: Verbosity -> Repo
-> IO (PackageIndex SourcePackage, [Dependency])
readRepoIndex verbosity repo = handleNotFound $ do
readRepoIndex verbosity repo =
let indexFile = repoLocalDir repo </> "00-index.tar"
(pkgs, prefs) <- either fail return
. foldlTarball extract ([], [])
=<< BS.readFile indexFile
cacheFile = repoLocalDir repo </> "00-index.cache"
in handleNotFound $ do
warnIfIndexIsOld indexFile
whenCacheOutOfDate indexFile cacheFile $ do
info verbosity $ "Updating the index cache file..."
updatePackageIndexCacheFile indexFile cacheFile
readPackageIndexCacheFile mkAvailablePackage indexFile cacheFile
pkgIndex <- evaluate $ PackageIndex.fromList
[ SourcePackage {
where
mkAvailablePackage pkgid pkg =
SourcePackage {
packageInfoId = pkgid,
packageDescription = pkg,
packageSource = RepoTarballPackage repo pkgid Nothing
}
| (pkgid, pkg) <- pkgs]
warnIfIndexIsOld indexFile
return (pkgIndex, prefs)
where
extract (pkgs, prefs) entry = fromMaybe (pkgs, prefs) $
(do pkg <- extractPkg entry; return (pkg:pkgs, prefs))
`mplus` (do prefs' <- extractPrefs entry; return (pkgs, prefs'++prefs))
extractPrefs :: Tar.Entry -> Maybe [Dependency]
extractPrefs entry = case Tar.entryContent entry of
Tar.NormalFile content _
| takeFileName (Tar.entryPath entry) == "preferred-versions"
-> Just . parsePreferredVersions
. BS.Char8.unpack $ content
_ -> Nothing
handleNotFound action = catch action $ \e -> if isDoesNotExistError e
then do
......@@ -201,11 +202,32 @@ readRepoIndex verbosity repo = handleNotFound $ do
++ "'cabal update' to get the latest list of available packages."
Right _localRepo -> return ()
parsePreferredVersions :: String -> [Dependency]
parsePreferredVersions = catMaybes
. map simpleParse
. filter (not . isPrefixOf "--")
. lines
-- | It is not necessary to call this, as the cache will be updated when the
-- index is read normally. However you can do the work earlier if you like.
--
updateRepoIndexCache :: Verbosity -> Repo -> IO ()
updateRepoIndexCache verbosity repo =
whenCacheOutOfDate indexFile cacheFile $ do
info verbosity $ "Updating the index cache file..."
updatePackageIndexCacheFile indexFile cacheFile
where
indexFile = repoLocalDir repo </> "00-index.tar"
cacheFile = repoLocalDir repo </> "00-index.cache"
whenCacheOutOfDate :: FilePath-> FilePath -> IO () -> IO ()
whenCacheOutOfDate origFile cacheFile action = do
exists <- doesFileExist cacheFile
if not exists
then action
else do
origTime <- getModificationTime origFile
cacheTime <- getModificationTime cacheFile
unless (cacheTime >= origTime) action
------------------------------------------------------------------------
-- Reading the index file
--
-- | Read a compressed \"00-index.tar.gz\" file into a 'PackageIndex'.
--
......@@ -218,22 +240,55 @@ parsePreferredVersions = catMaybes
--
readPackageIndexFile :: Package pkg
=> (PackageId -> GenericPackageDescription -> pkg)
-> FilePath -> IO (PackageIndex pkg)
-> FilePath
-> IO (PackageIndex pkg, [Dependency])
readPackageIndexFile mkPkg indexFile = do
pkgs <- either fail return
. parseRepoIndex
. maybeDecompress
=<< BS.readFile indexFile
evaluate $ PackageIndex.fromList
[ mkPkg pkgid pkg | (pkgid, pkg) <- pkgs]
(pkgs, prefs) <- either fail return
. parsePackageIndex
. maybeDecompress
=<< BS.readFile indexFile
pkgs' <- evaluate $ PackageIndex.fromList
[ mkPkg pkgid pkg | (pkgid, pkg, _) <- pkgs]
return (pkgs', prefs)
-- | Parse an uncompressed \"00-index.tar\" repository index file represented
-- as a 'ByteString'.
--
parseRepoIndex :: ByteString
-> Either String [(PackageId, GenericPackageDescription)]
parseRepoIndex = foldlTarball (\pkgs -> maybe pkgs (:pkgs) . extractPkg) []
parsePackageIndex :: ByteString
-> Either String
( [(PackageId, GenericPackageDescription, BlockNo)]
, [Dependency] )
parsePackageIndex = accum 0 [] [] . Tar.read
where
accum blockNo pkgs prefs es = case es of
Tar.Fail err -> Left err
Tar.Done -> Right (reverse pkgs, reverse prefs)
Tar.Next e es' -> accum blockNo' pkgs' prefs' es'
where
(pkgs', prefs') = extract blockNo pkgs prefs e
blockNo' = blockNo + sizeInBlocks e
extract blockNo pkgs prefs entry =
fromMaybe (pkgs, prefs) $
tryExtractPkg
`mplus` tryExtractPrefs
where
tryExtractPkg = do
(pkgid, pkg) <- extractPkg entry
return ((pkgid, pkg, blockNo):pkgs, prefs)
tryExtractPrefs = do
prefs' <- extractPrefs entry
return (pkgs, prefs'++prefs)
sizeInBlocks entry =
1 + case Tar.entryContent entry of
Tar.NormalFile _ size -> bytesToBlocks size
Tar.OtherEntryType _ _ size -> bytesToBlocks size
_ -> 0
where
bytesToBlocks s = 1 + ((fromIntegral s - 1) `div` 512)
extractPkg :: Tar.Entry -> Maybe (PackageId, GenericPackageDescription)
extractPkg entry = case Tar.entryContent entry of
......@@ -256,10 +311,156 @@ extractPkg entry = case Tar.entryContent entry of
where
fileName = Tar.entryPath entry
foldlTarball :: (a -> Tar.Entry -> a) -> a
-> ByteString -> Either String a
foldlTarball f z = either Left (Right . foldl f z) . check [] . Tar.read
extractPrefs :: Tar.Entry -> Maybe [Dependency]
extractPrefs entry = case Tar.entryContent entry of
Tar.NormalFile content _
| takeFileName (Tar.entryPath entry) == "preferred-versions"
-> Just . parsePreferredVersions
. BS.Char8.unpack $ content
_ -> Nothing
parsePreferredVersions :: String -> [Dependency]
parsePreferredVersions = catMaybes
. map simpleParse
. filter (not . isPrefixOf "--")
. lines
------------------------------------------------------------------------
-- Reading and updating the index cache
--
updatePackageIndexCacheFile :: FilePath -> FilePath -> IO ()
updatePackageIndexCacheFile indexFile cacheFile = do
(pkgs, prefs) <- either fail return
. parsePackageIndex
. maybeDecompress
=<< BS.readFile indexFile
let cache = mkCache pkgs prefs
writeFile cacheFile (showIndexCache cache)
where
mkCache pkgs prefs =
[ CachePrefrence pref | pref <- prefs ]
++ [ CachePackageId pkgid blockNo | (pkgid, _, blockNo) <- pkgs ]
readPackageIndexCacheFile :: Package pkg
=> (PackageId -> GenericPackageDescription -> pkg)
-> FilePath
-> FilePath
-> IO (PackageIndex pkg, [Dependency])
readPackageIndexCacheFile mkPkg indexFile cacheFile = do
indexHnd <- openFile indexFile ReadMode
cache <- liftM readIndexCache (BSS.readFile cacheFile)
packageIndexFromCache mkPkg indexHnd cache
packageIndexFromCache :: Package pkg
=> (PackageId -> GenericPackageDescription -> pkg)
-> Handle
-> [IndexCacheEntry]
-> IO (PackageIndex pkg, [Dependency])
packageIndexFromCache mkPkg hnd = accum mempty []
where
accum srcpkgs prefs [] = do
-- Have to reverse entries, since in a tar file, later entries mask
-- earlier ones, and PackageIndex.fromList does the same, but we
-- accumulate the list of entries in reverse order, so need to reverse.
pkgIndex <- evaluate $ PackageIndex.fromList (reverse srcpkgs)
return (pkgIndex, prefs)
accum srcpkgs prefs (CachePackageId pkgid blockno : entries) = do
-- Given the cache entry, make a package index entry.
-- The magic here is that we use lazy IO to read the .cabal file
-- from the index tarball if it turns out that we need it.
-- Most of the time we only need the package id.
pkg <- unsafeInterleaveIO $ do
getPackageDescription blockno
let srcpkg = mkPkg pkgid pkg
accum (srcpkg:srcpkgs) prefs entries
accum srcpkgs prefs (CachePrefrence pref : entries) =
accum srcpkgs (pref:prefs) entries
getPackageDescription blockno = do
hSeek hnd AbsoluteSeek (fromIntegral (blockno * 512))
header <- BS.hGet hnd 512
size <- getEntrySize header
content <- BS.hGet hnd (fromIntegral size)
readPackageDescription content
getEntrySize header =
case Tar.read header of
Tar.Next e _ ->
case Tar.entryContent e of
Tar.NormalFile _ size -> return size
_ -> interror "unexpected tar entry type"
_ -> interror "could not read tar file entry"
readPackageDescription content =
case parsePackageDescription . fromUTF8 . BS.Char8.unpack $ content of
ParseOk _ d -> return d
_ -> interror "failed to parse .cabal file"
interror msg = die $ "internal error when reading package index: " ++ msg
++ "The package index or index cache is probably "
++ "corrupt. Running cabal update might fix it."
------------------------------------------------------------------------
-- Index cache data structure
--
-- | Tar files are block structured with 512 byte blocks. Every header and file
-- content starts on a block boundary.
--
type BlockNo = Int
data IndexCacheEntry = CachePackageId PackageId BlockNo
| CachePrefrence Dependency
deriving (Eq, Show)
readIndexCacheEntry :: BSS.ByteString -> Maybe IndexCacheEntry
readIndexCacheEntry = \line ->
case BSS.words line of
[key, pkgnamestr, pkgverstr, sep, blocknostr]
| key == packageKey && sep == blocknoKey ->
case (parseName pkgnamestr, parseVer pkgverstr [], parseBlockNo blocknostr) of
(Just pkgname, Just pkgver, Just blockno)
-> Just (CachePackageId (PackageIdentifier pkgname pkgver) blockno)
_ -> Nothing
(key: remainder) | key == preferredVersionKey ->
fmap CachePrefrence (simpleParse (BSS.unpack (BSS.unwords remainder)))
_ -> Nothing
where
check _ (Tar.Fail err) = Left err
check ok Tar.Done = Right ok
check ok (Tar.Next e es) = check (e:ok) es
packageKey = BSS.pack "pkg:"
blocknoKey = BSS.pack "b#"
preferredVersionKey = BSS.pack "pref-ver:"
parseName str
| BSS.all (\c -> isAlphaNum c || c == '-') str
= Just (PackageName (BSS.unpack str))
| otherwise = Nothing
parseVer str vs =
case BSS.readInt str of
Nothing -> Nothing
Just (v, str') -> case BSS.uncons str' of
Just ('.', str'') -> parseVer str'' (v:vs)
Just _ -> Nothing
Nothing -> Just (Version (reverse (v:vs)) [])
parseBlockNo str =
case BSS.readInt str of
Just (blockno, remainder) | BSS.null remainder -> Just blockno
_ -> Nothing
showIndexCacheEntry :: IndexCacheEntry -> String
showIndexCacheEntry entry = case entry of
CachePackageId pkgid b -> "pkg: " ++ display (packageName pkgid)
++ " " ++ display (packageVersion pkgid)
++ " b# " ++ show b
CachePrefrence dep -> "pref-ver: " ++ display dep
readIndexCache :: BSS.ByteString -> [IndexCacheEntry]
readIndexCache = catMaybes . map readIndexCacheEntry . BSS.lines
showIndexCache :: [IndexCacheEntry] -> String
showIndexCache = unlines . map showIndexCacheEntry
......@@ -20,7 +20,7 @@ import Distribution.Client.FetchUtils
( downloadIndex )
import qualified Distribution.Client.PackageIndex as PackageIndex
import Distribution.Client.IndexUtils
( getSourcePackages )
( getSourcePackages, updateRepoIndexCache )
import qualified Paths_cabal_install
( version )
......@@ -60,6 +60,7 @@ updateRepo verbosity repo = case repoKind repo of
writeFileAtomic (dropExtension indexPath) . BS.Char8.unpack
. maybeDecompress
=<< BS.readFile indexPath
updateRepoIndexCache verbosity repo
checkForSelfUpgrade :: Verbosity -> [Repo] -> IO ()
checkForSelfUpgrade verbosity repos = do
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment