Commit dd898373 authored by Duncan Coutts's avatar Duncan Coutts Committed by Mikhail Glushenkov
Browse files

Use the SHA256 hashes from the TUF repo metadata in planning

During planning we calculate the nix-style package ids by hashing
everything. A crucial component of the package id hashes is the hash of
the package source, which currently is just hashes of tarballs.

For packages from hackage repos we currently have to download the
tarballs after running the solver but before elaborating the install
plan. So we have the slightly surprising behaviour that doing
cabal (new-)build --dry-run will already download the packages.

With packages from repos that support the new TUF-based format we do
have hashes available up front as part of the repo metadata / index,
without having to download the tarballs and hash them.

This patch takes advantage of secure TUF repos to get the package source
hashes without having to download tarballs. For classic non-secure repos
it has the same old behaviour of downloading up front.

(cherry picked from commit 2e4533a0)
parent 8a1e9268
......@@ -20,6 +20,7 @@ module Distribution.Client.FetchUtils (
checkFetched,
-- ** specifically for repo packages
checkRepoTarballFetched,
fetchRepoTarball,
-- * fetching other things
......@@ -70,7 +71,10 @@ isFetched loc = case loc of
RemoteTarballPackage _uri local -> return (isJust local)
RepoTarballPackage repo pkgid _ -> doesFileExist (packageFile repo pkgid)
-- | Checks if the package has already been fetched (or does not need
-- fetching) and if so returns evidence in the form of a 'PackageLocation'
-- with a resolved local file location.
--
checkFetched :: UnresolvedPkgLoc
-> IO (Maybe ResolvedPkgLoc)
checkFetched loc = case loc of
......@@ -84,12 +88,20 @@ checkFetched loc = case loc of
return (Just $ RepoTarballPackage repo pkgid file)
RemoteTarballPackage _uri Nothing -> return Nothing
RepoTarballPackage repo pkgid Nothing -> do
let file = packageFile repo pkgid
exists <- doesFileExist file
if exists
then return (Just $ RepoTarballPackage repo pkgid file)
else return Nothing
RepoTarballPackage repo pkgid Nothing ->
fmap (fmap (RepoTarballPackage repo pkgid))
(checkRepoTarballFetched repo pkgid)
-- | Like 'checkFetched' but for the specific case of a 'RepoTarballPackage'.
--
checkRepoTarballFetched :: Repo -> PackageId -> IO (Maybe FilePath)
checkRepoTarballFetched repo pkgid = do
let file = packageFile repo pkgid
exists <- doesFileExist file
if exists
then return (Just file)
else return Nothing
-- | Fetch a package if we don't have it already.
......
......@@ -21,7 +21,8 @@ module Distribution.Client.PackageHash (
HashValue,
hashValue,
showHashValue,
readFileHashValue
readFileHashValue,
hashFromTUF,
) where
import Distribution.Package
......@@ -40,6 +41,8 @@ import Distribution.Text
import Distribution.Client.Types
( InstalledPackageId )
import qualified Hackage.Security.Client as Sec
import qualified Crypto.Hash.SHA256 as SHA256
import qualified Data.ByteString.Base16 as Base16
import qualified Data.ByteString.Char8 as BS
......@@ -47,7 +50,6 @@ import qualified Data.ByteString.Lazy.Char8 as LBS
import qualified Data.Set as Set
import Data.Set (Set)
import Control.Monad (unless)
import Data.Maybe (catMaybes)
import Data.List (sortBy, intercalate)
import Data.Function (on)
......@@ -211,18 +213,37 @@ instance Binary HashValue where
put (HashValue digest) = put digest
get = do
digest <- get
unless (BS.length digest == 32) $ -- NB: valid for SHA256
fail "HashValue: bad digest"
-- Cannot do any sensible validation here. Although we use SHA256
-- for stuff we hash ourselves, we can also get hashes from TUF
-- and that can in principle use different hash functions in future.
return (HashValue digest)
-- | Hash some data. Currently uses SHA256.
--
hashValue :: LBS.ByteString -> HashValue
hashValue = HashValue . SHA256.hashlazy
showHashValue :: HashValue -> String
showHashValue (HashValue digest) = BS.unpack (Base16.encode digest)
-- | Hash the content of a file. Uses SHA256.
--
readFileHashValue :: FilePath -> IO HashValue
readFileHashValue tarball =
withBinaryFile tarball ReadMode $ \hnd ->
evaluate . hashValue =<< LBS.hGetContents hnd
-- | Convert a hash from TUF metadata into a 'PackageSourceHash'.
--
-- Note that TUF hashes don't neessarily have to be SHA256, since it can
-- support new algorithms in future.
--
hashFromTUF :: Sec.Hash -> HashValue
hashFromTUF (Sec.Hash hashstr) =
--TODO: [code cleanup] either we should get TUF to use raw bytestrings or
-- perhaps we should also just use a base16 string as the internal rep.
case Base16.decode (BS.pack hashstr) of
(hash, trailing) | not (BS.null hash) && BS.null trailing
-> HashValue hash
_ -> error "hashFromTUF: cannot decode base16 hash"
......@@ -75,6 +75,7 @@ import Distribution.Client.DistDirLayout
import Distribution.Client.SetupWrapper
import Distribution.Client.JobControl
import Distribution.Client.FetchUtils
import qualified Hackage.Security.Client as Sec
import Distribution.Client.PkgConfigDb
import Distribution.Client.Setup hiding (packageName, cabalVersion)
import Distribution.Utils.NubList
......@@ -122,12 +123,12 @@ import Control.Monad.State as State
import Control.Exception
import Data.List
import Data.Maybe
import Data.Either
import Data.Monoid
import Data.Function
import System.FilePath
import System.Directory (doesDirectoryExist)
------------------------------------------------------------------------------
-- * Elaborated install plan
------------------------------------------------------------------------------
......@@ -498,7 +499,7 @@ rebuildInstallPlan verbosity
sourcePackageHashes <-
rerunIfChanged verbosity fileMonitorSourceHashes
(map packageId $ InstallPlan.toList solverPlan) $
(packageLocationsSignature solverPlan) $
getPackageSourceHashes verbosity withRepoCtx solverPlan
defaultInstallDirs <- liftIO $ userInstallDirTemplates compiler
......@@ -667,6 +668,16 @@ recreateDirectory verbosity createParents dir = do
monitorFiles [monitorDirectoryExistence dir]
-- | Select the config values to monitor for changes package source hashes.
packageLocationsSignature :: SolverInstallPlan
-> [(PackageId, PackageLocation (Maybe FilePath))]
packageLocationsSignature solverPlan =
[ (packageId pkg, packageSource pkg)
| InstallPlan.Configured (SolverPackage { solverPkgSource = pkg})
<- InstallPlan.toList solverPlan
]
-- | Get the 'HashValue' for all the source packages where we use hashes,
-- and download any packages required to do so.
--
......@@ -676,51 +687,123 @@ getPackageSourceHashes :: Verbosity
-> (forall a. (RepoContext -> IO a) -> IO a)
-> SolverInstallPlan
-> Rebuild (Map PackageId PackageSourceHash)
getPackageSourceHashes verbosity withRepoCtx installPlan = do
getPackageSourceHashes verbosity withRepoCtx solverPlan = do
-- Determine which packages need fetching, and which are present already
-- Determine if and where to get the package's source hash from.
--
pkgslocs <- liftIO $ sequence
[ do let locm = packageSource pkg
mloc <- checkFetched locm
return (pkg, locm, mloc)
| InstallPlan.Configured
SolverPackage { solverPkgSource = pkg } <- InstallPlan.toList installPlan ]
let requireDownloading = [ (pkg, locm) | (pkg, locm, Nothing) <- pkgslocs ]
alreadyDownloaded = [ (pkg, loc) | (pkg, _, Just loc) <- pkgslocs ]
-- Download the ones we need
let allPkgLocations :: [(PackageId, PackageLocation (Maybe FilePath))]
allPkgLocations =
[ (packageId pkg, packageSource pkg)
| InstallPlan.Configured (SolverPackage { solverPkgSource = pkg})
<- InstallPlan.toList solverPlan ]
-- Tarballs that were local in the first place.
-- We'll hash these tarball files directly.
localTarballPkgs :: [(PackageId, FilePath)]
localTarballPkgs =
[ (pkgid, tarball)
| (pkgid, LocalTarballPackage tarball) <- allPkgLocations ]
-- Tarballs from remote URLs. We must have downloaded these already
-- (since we extracted the .cabal file earlier)
--TODO: [required eventually] finish remote tarball functionality
-- allRemoteTarballPkgs =
-- [ (pkgid, )
-- | (pkgid, RemoteTarballPackage ) <- allPkgLocations ]
-- Tarballs from repositories, either where the repository provides
-- hashes as part of the repo metadata, or where we will have to
-- download and hash the tarball.
repoTarballPkgsWithMetadata :: [(PackageId, Repo)]
repoTarballPkgsWithoutMetadata :: [(PackageId, Repo)]
(repoTarballPkgsWithMetadata,
repoTarballPkgsWithoutMetadata) =
partitionEithers
[ case repo of
RepoSecure{} -> Left (pkgid, repo)
_ -> Right (pkgid, repo)
| (pkgid, RepoTarballPackage repo _ _) <- allPkgLocations ]
-- For tarballs from repos that do not have hashes available we now have
-- to check if the packages were downloaded already.
--
newlyDownloaded <-
if null requireDownloading
then return []
else liftIO $
withRepoCtx $ \repoctx ->
(repoTarballPkgsToDownload,
repoTarballPkgsDownloaded)
<- fmap partitionEithers $
liftIO $ sequence
[ do mtarball <- checkRepoTarballFetched repo pkgid
case mtarball of
Nothing -> return (Left (pkgid, repo))
Just tarball -> return (Right (pkgid, tarball))
| (pkgid, repo) <- repoTarballPkgsWithoutMetadata ]
(hashesFromRepoMetadata,
repoTarballPkgsNewlyDownloaded) <-
-- Avoid having to initialise the repository (ie 'withRepoCtx') if we
-- don't have to. (The main cost is configuring the http client.)
if null repoTarballPkgsToDownload && null repoTarballPkgsWithMetadata
then return (Map.empty, [])
else liftIO $ withRepoCtx $ \repoctx -> do
-- For tarballs from repos that do have hashes available as part of the
-- repo metadata we now load up the index for each repo and retrieve
-- the hashes for the packages
--
hashesFromRepoMetadata <-
Sec.uncheckClientErrors $ --TODO: [code cleanup] wrap in our own exceptions
fmap (Map.fromList . concat) $
sequence
-- Reading the repo index is expensive so we group the packages by repo
[ repoContextWithSecureRepo repoctx repo $ \secureRepo ->
Sec.withIndex secureRepo $ \repoIndex ->
sequence
[ do loc <- fetchPackage verbosity repoctx locm
return (pkg, loc)
| (pkg, locm) <- requireDownloading ]
[ do hash <- Sec.trusted <$> -- strip off Trusted tag
Sec.indexLookupHash repoIndex pkgid
-- Note that hackage-security currently uses SHA256
-- but this API could in principle give us some other
-- choice in future.
return (pkgid, hashFromTUF hash)
| pkgid <- pkgids ]
| (repo, pkgids) <-
map (\grp@((_,repo):_) -> (repo, map fst grp))
. groupBy ((==) `on` (remoteRepoName . repoRemote . snd))
. sortBy (compare `on` (remoteRepoName . repoRemote . snd))
$ repoTarballPkgsWithMetadata
]
-- Get the hashes of all the tarball packages (i.e. not local dir pkgs)
-- For tarballs from repos that do not have hashes available, download
-- the ones we previously determined we need.
--
repoTarballPkgsNewlyDownloaded <-
sequence
[ do tarball <- fetchRepoTarball verbosity repoctx repo pkgid
return (pkgid, tarball)
| (pkgid, repo) <- repoTarballPkgsToDownload ]
return (hashesFromRepoMetadata,
repoTarballPkgsNewlyDownloaded)
-- Hash tarball files for packages where we have to do that. This includes
-- tarballs that were local in the first place, plus tarballs from repos,
-- either previously cached or freshly downloaded.
--
let pkgsTarballs =
[ (packageId pkg, tarball)
| (pkg, srcloc) <- newlyDownloaded ++ alreadyDownloaded
, tarball <- maybeToList (tarballFileLocation srcloc) ]
monitorFiles [ monitorFile tarball | (_pkgid, tarball) <- pkgsTarballs ]
liftM Map.fromList $ liftIO $
let allTarballFilePkgs :: [(PackageId, FilePath)]
allTarballFilePkgs = localTarballPkgs
++ repoTarballPkgsDownloaded
++ repoTarballPkgsNewlyDownloaded
hashesFromTarballFiles <- liftIO $
fmap Map.fromList $
sequence
[ do srchash <- readFileHashValue tarball
return (pkgid, srchash)
| (pkgid, tarball) <- pkgsTarballs ]
where
tarballFileLocation (LocalUnpackedPackage _dir) = Nothing
tarballFileLocation (LocalTarballPackage tarball) = Just tarball
tarballFileLocation (RemoteTarballPackage _ tarball) = Just tarball
tarballFileLocation (RepoTarballPackage _ _ tarball) = Just tarball
| (pkgid, tarball) <- allTarballFilePkgs
]
monitorFiles [ monitorFile tarball
| (_pkgid, tarball) <- allTarballFilePkgs ]
-- Return the combination
return $! hashesFromRepoMetadata
<> hashesFromTarballFiles
-- ------------------------------------------------------------
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment