Commit db1ef505 authored by Herbert Valerio Riedel's avatar Herbert Valerio Riedel 🕺
Browse files

Refactor & optimise construction of index cache

This commit was motivated by @dcoutts' code-review comment:

> Originally with using the `Sec.directoryEntries` that gave us only the
> final version of each file, ie not all intermediate revisions. And
> previously our strategy was to go through the final versions of each
> file, in file order, and lookup just the ones we're interested in (which
> in practice is 99% of them).
> Now for the new cache we want to go through all revisions, which means
> all entries in file order. So instead of using `Sec.directoryEntries`
> which reads from the tar index, we go straight for `Sec.directoryFirst`
> which is block 0 and iterate through, using `lazyUnfold`.
> But we can now significantly simplify this and do it more
> efficiently. Note that `indexLookupEntry` and `indexLookupFileEntry` are
> expensive operations that seek in the tar file and read the tar entry at
> that point. So lets do it exactly once per entry. The current code does
> it once in the `lazyUnfold indexLookupEntry` and then again in `mk`. But
> the old `mk` only did that because it had not previously looked up the
> entry.
parent 92c51628
......@@ -474,32 +474,29 @@ withIndexEntries :: Index -> ([IndexCacheEntry] -> IO a) -> IO a
withIndexEntries (RepoIndex repoCtxt repo@RepoSecure{..}) callback =
repoContextWithSecureRepo repoCtxt repo $ \repoSecure ->
Sec.withIndex repoSecure $ \Sec.IndexCallbacks{..} -> do
let mk :: (Sec.DirectoryEntry, fp, Maybe (Sec.Some Sec.IndexFile))
-> IO [IndexCacheEntry]
mk (_, _fp, Nothing) =
return [] -- skip unrecognized file
mk (_, _fp, Just (Sec.Some (Sec.IndexPkgMetadata _pkgId))) =
return [] -- skip metadata
mk (dirEntry, _fp, Just (Sec.Some file@(Sec.IndexPkgCabal pkgId))) = do
let blockNo = Sec.directoryEntryBlockNo dirEntry
timestamp <- Sec.indexEntryTime `fmap` indexLookupFileEntry dirEntry file
return [CachePackageId pkgId blockNo timestamp]
mk (dirEntry, _fp, Just (Sec.Some file@(Sec.IndexPkgPrefs _pkgName))) = do
let blockNo = Sec.directoryEntryBlockNo dirEntry
content <- Sec.indexEntryContent `fmap` indexLookupFileEntry dirEntry file
timestamp <- Sec.indexEntryTime `fmap` indexLookupFileEntry dirEntry file
return $ map (\x -> CachePreference x blockNo timestamp) (parsePreferredVersions content)
let mk2 :: (Sec.DirectoryEntry, Sec.Some Sec.IndexEntry)
-> (Sec.DirectoryEntry, Sec.IndexPath, Maybe (Sec.Some Sec.IndexFile))
mk2 (dent, Sec.Some sie) =
(dent, Sec.indexEntryPath sie, fmap Sec.Some (Sec.indexEntryPathParsed sie))
-- dirIdxEnts :: [(Sec.DirectoryEntry, Sec.Some Sec.IndexEntry)]
dirIdxEnts <- lazyUnfold indexLookupEntry (Sec.directoryFirst indexDirectory)
entriess <- lazySequence $ map (mk . mk2) dirIdxEnts
callback $ concat entriess
-- Incrementally (lazily) read all the entries in the tar file in order,
-- including all revisions, not just the last revision of each file
indexEntries <- lazyUnfold indexLookupEntry (Sec.directoryFirst indexDirectory)
callback [ cacheEntry
| (dirEntry, indexEntry) <- indexEntries
, cacheEntry <- toCacheEntries dirEntry indexEntry ]
toCacheEntries :: Sec.DirectoryEntry -> Sec.Some Sec.IndexEntry
-> [IndexCacheEntry]
toCacheEntries dirEntry (Sec.Some sie) =
case Sec.indexEntryPathParsed sie of
Nothing -> [] -- skip unrecognized file
Just (Sec.IndexPkgMetadata _pkgId) -> [] -- skip metadata
Just (Sec.IndexPkgCabal pkgId) -> force
[CachePackageId pkgId blockNo timestamp]
Just (Sec.IndexPkgPrefs _pkgName) -> force
[ CachePreference dep blockNo timestamp
| dep <- parsePreferredVersions (Sec.indexEntryContent sie)
blockNo = Sec.directoryEntryBlockNo dirEntry
timestamp = Sec.indexEntryTime sie
withIndexEntries index callback = do -- non-secure repositories
withFile (indexFile index) ReadMode $ \h -> do
bs <- maybeDecompress `fmap` BS.hGetContents h
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment