Commit 37712426 authored by Mikhail Glushenkov's avatar Mikhail Glushenkov
Browse files

Merge pull request #3372 from 23Skidoo/master-3370

Port #3370 to master
parents 8a1e9268 dd898373
......@@ -20,6 +20,7 @@ module Distribution.Client.FetchUtils (
checkFetched,
-- ** specifically for repo packages
checkRepoTarballFetched,
fetchRepoTarball,
-- * fetching other things
......@@ -70,7 +71,10 @@ isFetched loc = case loc of
RemoteTarballPackage _uri local -> return (isJust local)
RepoTarballPackage repo pkgid _ -> doesFileExist (packageFile repo pkgid)
-- | Checks if the package has already been fetched (or does not need
-- fetching) and if so returns evidence in the form of a 'PackageLocation'
-- with a resolved local file location.
--
checkFetched :: UnresolvedPkgLoc
-> IO (Maybe ResolvedPkgLoc)
checkFetched loc = case loc of
......@@ -84,12 +88,20 @@ checkFetched loc = case loc of
return (Just $ RepoTarballPackage repo pkgid file)
RemoteTarballPackage _uri Nothing -> return Nothing
RepoTarballPackage repo pkgid Nothing -> do
let file = packageFile repo pkgid
exists <- doesFileExist file
if exists
then return (Just $ RepoTarballPackage repo pkgid file)
else return Nothing
RepoTarballPackage repo pkgid Nothing ->
fmap (fmap (RepoTarballPackage repo pkgid))
(checkRepoTarballFetched repo pkgid)
-- | Like 'checkFetched' but for the specific case of a 'RepoTarballPackage'.
--
checkRepoTarballFetched :: Repo -> PackageId -> IO (Maybe FilePath)
checkRepoTarballFetched repo pkgid = do
let file = packageFile repo pkgid
exists <- doesFileExist file
if exists
then return (Just file)
else return Nothing
-- | Fetch a package if we don't have it already.
......
......@@ -21,7 +21,8 @@ module Distribution.Client.PackageHash (
HashValue,
hashValue,
showHashValue,
readFileHashValue
readFileHashValue,
hashFromTUF,
) where
import Distribution.Package
......@@ -40,6 +41,8 @@ import Distribution.Text
import Distribution.Client.Types
( InstalledPackageId )
import qualified Hackage.Security.Client as Sec
import qualified Crypto.Hash.SHA256 as SHA256
import qualified Data.ByteString.Base16 as Base16
import qualified Data.ByteString.Char8 as BS
......@@ -47,7 +50,6 @@ import qualified Data.ByteString.Lazy.Char8 as LBS
import qualified Data.Set as Set
import Data.Set (Set)
import Control.Monad (unless)
import Data.Maybe (catMaybes)
import Data.List (sortBy, intercalate)
import Data.Function (on)
......@@ -211,18 +213,37 @@ instance Binary HashValue where
put (HashValue digest) = put digest
get = do
digest <- get
unless (BS.length digest == 32) $ -- NB: valid for SHA256
fail "HashValue: bad digest"
-- Cannot do any sensible validation here. Although we use SHA256
-- for stuff we hash ourselves, we can also get hashes from TUF
-- and that can in principle use different hash functions in future.
return (HashValue digest)
-- | Hash some data. Currently uses SHA256.
--
hashValue :: LBS.ByteString -> HashValue
hashValue = HashValue . SHA256.hashlazy
showHashValue :: HashValue -> String
showHashValue (HashValue digest) = BS.unpack (Base16.encode digest)
-- | Hash the content of a file. Uses SHA256.
--
readFileHashValue :: FilePath -> IO HashValue
readFileHashValue tarball =
withBinaryFile tarball ReadMode $ \hnd ->
evaluate . hashValue =<< LBS.hGetContents hnd
-- | Convert a hash from TUF metadata into a 'PackageSourceHash'.
--
-- Note that TUF hashes don't neessarily have to be SHA256, since it can
-- support new algorithms in future.
--
hashFromTUF :: Sec.Hash -> HashValue
hashFromTUF (Sec.Hash hashstr) =
--TODO: [code cleanup] either we should get TUF to use raw bytestrings or
-- perhaps we should also just use a base16 string as the internal rep.
case Base16.decode (BS.pack hashstr) of
(hash, trailing) | not (BS.null hash) && BS.null trailing
-> HashValue hash
_ -> error "hashFromTUF: cannot decode base16 hash"
......@@ -75,6 +75,7 @@ import Distribution.Client.DistDirLayout
import Distribution.Client.SetupWrapper
import Distribution.Client.JobControl
import Distribution.Client.FetchUtils
import qualified Hackage.Security.Client as Sec
import Distribution.Client.PkgConfigDb
import Distribution.Client.Setup hiding (packageName, cabalVersion)
import Distribution.Utils.NubList
......@@ -122,12 +123,12 @@ import Control.Monad.State as State
import Control.Exception
import Data.List
import Data.Maybe
import Data.Either
import Data.Monoid
import Data.Function
import System.FilePath
import System.Directory (doesDirectoryExist)
------------------------------------------------------------------------------
-- * Elaborated install plan
------------------------------------------------------------------------------
......@@ -498,7 +499,7 @@ rebuildInstallPlan verbosity
sourcePackageHashes <-
rerunIfChanged verbosity fileMonitorSourceHashes
(map packageId $ InstallPlan.toList solverPlan) $
(packageLocationsSignature solverPlan) $
getPackageSourceHashes verbosity withRepoCtx solverPlan
defaultInstallDirs <- liftIO $ userInstallDirTemplates compiler
......@@ -667,6 +668,16 @@ recreateDirectory verbosity createParents dir = do
monitorFiles [monitorDirectoryExistence dir]
-- | Select the config values to monitor for changes package source hashes.
packageLocationsSignature :: SolverInstallPlan
-> [(PackageId, PackageLocation (Maybe FilePath))]
packageLocationsSignature solverPlan =
[ (packageId pkg, packageSource pkg)
| InstallPlan.Configured (SolverPackage { solverPkgSource = pkg})
<- InstallPlan.toList solverPlan
]
-- | Get the 'HashValue' for all the source packages where we use hashes,
-- and download any packages required to do so.
--
......@@ -676,51 +687,123 @@ getPackageSourceHashes :: Verbosity
-> (forall a. (RepoContext -> IO a) -> IO a)
-> SolverInstallPlan
-> Rebuild (Map PackageId PackageSourceHash)
getPackageSourceHashes verbosity withRepoCtx installPlan = do
getPackageSourceHashes verbosity withRepoCtx solverPlan = do
-- Determine which packages need fetching, and which are present already
-- Determine if and where to get the package's source hash from.
--
pkgslocs <- liftIO $ sequence
[ do let locm = packageSource pkg
mloc <- checkFetched locm
return (pkg, locm, mloc)
| InstallPlan.Configured
SolverPackage { solverPkgSource = pkg } <- InstallPlan.toList installPlan ]
let requireDownloading = [ (pkg, locm) | (pkg, locm, Nothing) <- pkgslocs ]
alreadyDownloaded = [ (pkg, loc) | (pkg, _, Just loc) <- pkgslocs ]
-- Download the ones we need
let allPkgLocations :: [(PackageId, PackageLocation (Maybe FilePath))]
allPkgLocations =
[ (packageId pkg, packageSource pkg)
| InstallPlan.Configured (SolverPackage { solverPkgSource = pkg})
<- InstallPlan.toList solverPlan ]
-- Tarballs that were local in the first place.
-- We'll hash these tarball files directly.
localTarballPkgs :: [(PackageId, FilePath)]
localTarballPkgs =
[ (pkgid, tarball)
| (pkgid, LocalTarballPackage tarball) <- allPkgLocations ]
-- Tarballs from remote URLs. We must have downloaded these already
-- (since we extracted the .cabal file earlier)
--TODO: [required eventually] finish remote tarball functionality
-- allRemoteTarballPkgs =
-- [ (pkgid, )
-- | (pkgid, RemoteTarballPackage ) <- allPkgLocations ]
-- Tarballs from repositories, either where the repository provides
-- hashes as part of the repo metadata, or where we will have to
-- download and hash the tarball.
repoTarballPkgsWithMetadata :: [(PackageId, Repo)]
repoTarballPkgsWithoutMetadata :: [(PackageId, Repo)]
(repoTarballPkgsWithMetadata,
repoTarballPkgsWithoutMetadata) =
partitionEithers
[ case repo of
RepoSecure{} -> Left (pkgid, repo)
_ -> Right (pkgid, repo)
| (pkgid, RepoTarballPackage repo _ _) <- allPkgLocations ]
-- For tarballs from repos that do not have hashes available we now have
-- to check if the packages were downloaded already.
--
newlyDownloaded <-
if null requireDownloading
then return []
else liftIO $
withRepoCtx $ \repoctx ->
(repoTarballPkgsToDownload,
repoTarballPkgsDownloaded)
<- fmap partitionEithers $
liftIO $ sequence
[ do mtarball <- checkRepoTarballFetched repo pkgid
case mtarball of
Nothing -> return (Left (pkgid, repo))
Just tarball -> return (Right (pkgid, tarball))
| (pkgid, repo) <- repoTarballPkgsWithoutMetadata ]
(hashesFromRepoMetadata,
repoTarballPkgsNewlyDownloaded) <-
-- Avoid having to initialise the repository (ie 'withRepoCtx') if we
-- don't have to. (The main cost is configuring the http client.)
if null repoTarballPkgsToDownload && null repoTarballPkgsWithMetadata
then return (Map.empty, [])
else liftIO $ withRepoCtx $ \repoctx -> do
-- For tarballs from repos that do have hashes available as part of the
-- repo metadata we now load up the index for each repo and retrieve
-- the hashes for the packages
--
hashesFromRepoMetadata <-
Sec.uncheckClientErrors $ --TODO: [code cleanup] wrap in our own exceptions
fmap (Map.fromList . concat) $
sequence
-- Reading the repo index is expensive so we group the packages by repo
[ repoContextWithSecureRepo repoctx repo $ \secureRepo ->
Sec.withIndex secureRepo $ \repoIndex ->
sequence
[ do loc <- fetchPackage verbosity repoctx locm
return (pkg, loc)
| (pkg, locm) <- requireDownloading ]
[ do hash <- Sec.trusted <$> -- strip off Trusted tag
Sec.indexLookupHash repoIndex pkgid
-- Note that hackage-security currently uses SHA256
-- but this API could in principle give us some other
-- choice in future.
return (pkgid, hashFromTUF hash)
| pkgid <- pkgids ]
| (repo, pkgids) <-
map (\grp@((_,repo):_) -> (repo, map fst grp))
. groupBy ((==) `on` (remoteRepoName . repoRemote . snd))
. sortBy (compare `on` (remoteRepoName . repoRemote . snd))
$ repoTarballPkgsWithMetadata
]
-- Get the hashes of all the tarball packages (i.e. not local dir pkgs)
-- For tarballs from repos that do not have hashes available, download
-- the ones we previously determined we need.
--
repoTarballPkgsNewlyDownloaded <-
sequence
[ do tarball <- fetchRepoTarball verbosity repoctx repo pkgid
return (pkgid, tarball)
| (pkgid, repo) <- repoTarballPkgsToDownload ]
return (hashesFromRepoMetadata,
repoTarballPkgsNewlyDownloaded)
-- Hash tarball files for packages where we have to do that. This includes
-- tarballs that were local in the first place, plus tarballs from repos,
-- either previously cached or freshly downloaded.
--
let pkgsTarballs =
[ (packageId pkg, tarball)
| (pkg, srcloc) <- newlyDownloaded ++ alreadyDownloaded
, tarball <- maybeToList (tarballFileLocation srcloc) ]
monitorFiles [ monitorFile tarball | (_pkgid, tarball) <- pkgsTarballs ]
liftM Map.fromList $ liftIO $
let allTarballFilePkgs :: [(PackageId, FilePath)]
allTarballFilePkgs = localTarballPkgs
++ repoTarballPkgsDownloaded
++ repoTarballPkgsNewlyDownloaded
hashesFromTarballFiles <- liftIO $
fmap Map.fromList $
sequence
[ do srchash <- readFileHashValue tarball
return (pkgid, srchash)
| (pkgid, tarball) <- pkgsTarballs ]
where
tarballFileLocation (LocalUnpackedPackage _dir) = Nothing
tarballFileLocation (LocalTarballPackage tarball) = Just tarball
tarballFileLocation (RemoteTarballPackage _ tarball) = Just tarball
tarballFileLocation (RepoTarballPackage _ _ tarball) = Just tarball
| (pkgid, tarball) <- allTarballFilePkgs
]
monitorFiles [ monitorFile tarball
| (_pkgid, tarball) <- allTarballFilePkgs ]
-- Return the combination
return $! hashesFromRepoMetadata
<> hashesFromTarballFiles
-- ------------------------------------------------------------
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment