From 1beba1bbf58d61d5fdafd49696001fe344143fba Mon Sep 17 00:00:00 2001 From: Edsko de Vries <edsko@well-typed.com> Date: Sat, 28 Feb 2015 16:28:04 +0000 Subject: [PATCH] Document unused graph traversal functions Both cabal-install and `Cabal` define a notion of a package index. `Cabal` defines data PackageIndex a = PackageIndex !(Map InstalledPackageId a) !(Map PackageName (Map Version [a])) whereas `cabal-install` defines newtype PackageIndex pkg = PackageIndex (Map PackageName [pkg]) Note that Cabal.PackageIndex is indexed by installed package IDs, whereas CabalInstall.PackageIndex is indexed by package names. There are a bunch of "graph traversal" functions that similarly duplicated between `Cabal` and `cabal-install`; in `Cabal` we have brokenPackages :: PackageInstalled a => PackageIndex a -> [(a, [InstalledPackageId])] dependencyClosure :: PackageInstalled a => PackageIndex a -> [InstalledPackageId] -> Either (PackageIndex a) [(a, [InstalledPackageId])] dependencyCycles :: PackageInstalled a => PackageIndex a -> [[a]] dependencyGraph :: PackageInstalled a => PackageIndex a -> (Graph.Graph, Graph.Vertex -> a, InstalledPackageId -> Maybe Graph.Vertex) dependencyInconsistencies :: PackageInstalled a => PackageIndex a -> [(PackageName, [(PackageId, Version)])] reverseDependencyClosure :: PackageInstalled a => PackageIndex a -> [InstalledPackageId] -> [a] reverseTopologicalOrder :: PackageInstalled a => PackageIndex a -> [a] topologicalOrder :: PackageInstalled a => PackageIndex a -> [a] which are mirrored in `cabal-install` as brokenPackages :: PackageFixedDeps pkg => PackageIndex pkg -> [(pkg, [PackageIdentifier])] dependencyClosure :: PackageFixedDeps pkg => PackageIndex pkg -> [PackageIdentifier] -> Either (PackageIndex pkg) [(pkg, [PackageIdentifier])] dependencyCycles :: PackageFixedDeps pkg => PackageIndex pkg -> [[pkg]] dependencyGraph :: PackageFixedDeps pkg => PackageIndex pkg -> (Graph.Graph, Graph.Vertex -> pkg, PackageIdentifier -> Maybe Graph.Vertex) dependencyInconsistencies :: PackageFixedDeps pkg => PackageIndex pkg -> [(PackageName, [(PackageIdentifier, Version)])] reverseDependencyClosure :: PackageFixedDeps pkg => PackageIndex pkg -> [PackageIdentifier] -> [pkg] reverseTopologicalOrder :: PackageFixedDeps pkg => PackageIndex pkg -> [pkg] topologicalOrder :: PackageFixedDeps pkg => PackageIndex pkg -> [pkg] This by itself makes a certain amount of sense, but here's where the situation gets confusing. `cabal-install` defines a `PlanIndex` as type PlanIndex = Cabal.PackageIndex PlanPackage Note that is using `Cabal`'s notion of a PackageIndex, not `cabal-install`'s; it makes sense that a PlanIndex is indexed by installed package IDs rather than package names (even if currently we have to fake installed package IDs. Almost all of the functions listed above, however, are only called on `PlanIndex`s. This means that we invoke the functions from `Cabal`, not the functions from `cabal-install`; in fact, almost all these functions in `cabal-install` are completely unused right now. In `cabal-install` but calls from `Cabal` ---------------------------------------------------------- closed brokenPackages acyclic dependencyCycles consistent dependencyInconsistencies problems brokenPackages', dependencyCycles', dependencyInconsistencies' This is more than just a code clean-up issue. As mentioned in the previous PR, the fundamental difference between Cabal and cabal-install is their view of dependencies: Cabal knows only about installed libraries and their library dependencies, whereas cabal knows about packages and the dependencies of their setup scripts, executables, test-suites, benchmarks, as well as their library dependencies. By calling the graph-traversal functions from `Cabal` rather than from `cabal-install`, any of these additional dependencies are either completely ignored, or else the distinction is lost (depending on how we implemented installedDepends for plan packages); and neither option is correct. For example, in `new` from Distribution.Client.InstallPlan (in `cabal-install`) we call `dependendyGraph` on the plan index; since the plan index is defined in terms of Cabal's plan index, we call Cabal's `dependencyGraph` here, but that means that this graph will completely lack any setup dependencies. The reverse graph is used in (only one place): `packagedThatDependOn`, which in turn is (only) used in `failed`. But this is wrong: if a package fails to install, if another package depends on it through a setup dependency, then that second package should also be marked as impossible to install. What needs to happen is that we modify the graph traversal functions from `cabal-install` to take a PackageIndex from `Cabal` (so that we can apply them to a PlanIndex), but use the dependencies from `FixedPackageDeps` rather than the flat or incomplete dependencies we get from `PackageInstalled`. In fact, the whole `PackageInstalled` instance for `ConfiguredPackage`, `ReadyPackage` and `PlanPackage` should go: returning only part of the dependencies, or else all dependencies flattened, is just too error prone. This first commit only documents the problem (this commit message) and moves the above functions to a new module called Distribution.Client.PlanIndex. Cleaning this up is complicated by the fact that we _do_ still call two of the above functions on a `CabalInstall.PackageIndex`: * `pruneInstallPlan` from `Distribution.Client.Freeze` calls `dependencyClosure` * The top-down solver calls `dependencyGraph` If we change the above functions to work on a `Cabal.PackageIndex` instead these two exceptions will break, so we need to look at that first. --- .../Distribution/Client/Dependency/TopDown.hs | 5 +- cabal-install/Distribution/Client/Freeze.hs | 3 +- .../Distribution/Client/PackageIndex.hs | 191 +--------------- .../Distribution/Client/PlanIndex.hs | 205 ++++++++++++++++++ 4 files changed, 214 insertions(+), 190 deletions(-) create mode 100644 cabal-install/Distribution/Client/PlanIndex.hs diff --git a/cabal-install/Distribution/Client/Dependency/TopDown.hs b/cabal-install/Distribution/Client/Dependency/TopDown.hs index 9c6cf314d5..f76943eb88 100644 --- a/cabal-install/Distribution/Client/Dependency/TopDown.hs +++ b/cabal-install/Distribution/Client/Dependency/TopDown.hs @@ -33,6 +33,7 @@ import Distribution.Client.Dependency.Types , Progress(..), foldProgress ) import qualified Distribution.Client.PackageIndex as PackageIndex +import qualified Distribution.Client.PlanIndex as PlanIndex import Distribution.Client.PackageIndex ( PackageIndex, PackageFixedDeps(depends) ) import Distribution.Package @@ -424,7 +425,7 @@ annotateInstalledPackages dfsNumber installed = PackageIndex.fromList transitiveDepends :: InstalledPackage -> [PackageId] transitiveDepends = map (packageId . toPkg) . tail . Graph.reachable graph . fromJust . toVertex . packageId - (graph, toPkg, toVertex) = PackageIndex.dependencyGraph installed + (graph, toPkg, toVertex) = PlanIndex.dependencyGraph installed -- | Annotate each available packages with its topological sort number and any @@ -667,7 +668,7 @@ improvePlan installed constraints0 selected0 = . Graph.topSort . Graph.transposeG $ graph - where (graph, toPkg, _) = PackageIndex.dependencyGraph index + where (graph, toPkg, _) = PlanIndex.dependencyGraph index -- ------------------------------------------------------------ -- * Adding and recording constraints diff --git a/cabal-install/Distribution/Client/Freeze.hs b/cabal-install/Distribution/Client/Freeze.hs index a33c0aaac7..cd2b4e1bc2 100644 --- a/cabal-install/Distribution/Client/Freeze.hs +++ b/cabal-install/Distribution/Client/Freeze.hs @@ -39,6 +39,7 @@ import Distribution.Simple.Compiler ( Compiler, compilerInfo, PackageDBStack ) import Distribution.Simple.PackageIndex (InstalledPackageIndex) import qualified Distribution.Client.PackageIndex as PackageIndex +import qualified Distribution.Client.PlanIndex as PlanIndex import Distribution.Simple.Program ( ProgramConfiguration ) import Distribution.Simple.Setup @@ -196,7 +197,7 @@ pruneInstallPlan :: InstallPlan.InstallPlan -> Either [PlanPackage] [(PlanPackage, [PackageIdentifier])] pruneInstallPlan installPlan pkgSpecifiers = mapLeft (removeSelf pkgIds . PackageIndex.allPackages) $ - PackageIndex.dependencyClosure pkgIdx pkgIds + PlanIndex.dependencyClosure pkgIdx pkgIds where pkgIdx = PackageIndex.fromList $ InstallPlan.toList installPlan pkgIds = [ packageId pkg | SpecificSourcePackage pkg <- pkgSpecifiers ] diff --git a/cabal-install/Distribution/Client/PackageIndex.hs b/cabal-install/Distribution/Client/PackageIndex.hs index 3a4a974dd1..39122a6a43 100644 --- a/cabal-install/Distribution/Client/PackageIndex.hs +++ b/cabal-install/Distribution/Client/PackageIndex.hs @@ -44,39 +44,25 @@ module Distribution.Client.PackageIndex ( -- ** Bulk queries allPackages, allPackagesByName, - - -- ** Special queries - brokenPackages, - dependencyClosure, - reverseDependencyClosure, - topologicalOrder, - reverseTopologicalOrder, - dependencyInconsistencies, - dependencyCycles, - dependencyGraph, ) where import Prelude hiding (lookup) import Control.Exception (assert) import qualified Data.Map as Map import Data.Map (Map) -import qualified Data.Tree as Tree -import qualified Data.Graph as Graph -import qualified Data.Array as Array -import Data.Array ((!)) -import Data.List (groupBy, sortBy, nub, isInfixOf) +import Data.List (groupBy, sortBy, isInfixOf) #if !MIN_VERSION_base(4,8,0) import Data.Monoid (Monoid(..)) #endif -import Data.Maybe (isJust, isNothing, fromMaybe, catMaybes) +import Data.Maybe (isJust, fromMaybe) import Distribution.Package ( PackageName(..), PackageIdentifier(..) , Package(..), packageName, packageVersion , Dependency(Dependency) ) import Distribution.Version - ( Version, withinRange ) -import Distribution.Simple.Utils (lowercase, equating, comparing) + ( withinRange ) +import Distribution.Simple.Utils (lowercase, comparing) -- | Subclass of packages that have specific versioned dependencies. -- @@ -330,172 +316,3 @@ searchByNameSubstring (PackageIndex m) searchterm = , lsearchterm `isInfixOf` lowercase name ] where lsearchterm = lowercase searchterm - --- --- * Special queries --- - --- | All packages that have dependencies that are not in the index. --- --- Returns such packages along with the dependencies that they're missing. --- -brokenPackages :: PackageFixedDeps pkg - => PackageIndex pkg - -> [(pkg, [PackageIdentifier])] -brokenPackages index = - [ (pkg, missing) - | pkg <- allPackages index - , let missing = [ pkg' | pkg' <- depends pkg - , isNothing (lookupPackageId index pkg') ] - , not (null missing) ] - --- | Tries to take the transitive closure of the package dependencies. --- --- If the transitive closure is complete then it returns that subset of the --- index. Otherwise it returns the broken packages as in 'brokenPackages'. --- --- * Note that if the result is @Right []@ it is because at least one of --- the original given 'PackageIdentifier's do not occur in the index. --- -dependencyClosure :: PackageFixedDeps pkg - => PackageIndex pkg - -> [PackageIdentifier] - -> Either (PackageIndex pkg) - [(pkg, [PackageIdentifier])] -dependencyClosure index pkgids0 = case closure mempty [] pkgids0 of - (completed, []) -> Left completed - (completed, _) -> Right (brokenPackages completed) - where - closure completed failed [] = (completed, failed) - closure completed failed (pkgid:pkgids) = case lookupPackageId index pkgid of - Nothing -> closure completed (pkgid:failed) pkgids - Just pkg -> case lookupPackageId completed (packageId pkg) of - Just _ -> closure completed failed pkgids - Nothing -> closure completed' failed pkgids' - where completed' = insert pkg completed - pkgids' = depends pkg ++ pkgids - --- | Takes the transitive closure of the packages reverse dependencies. --- --- * The given 'PackageIdentifier's must be in the index. --- -reverseDependencyClosure :: PackageFixedDeps pkg - => PackageIndex pkg - -> [PackageIdentifier] - -> [pkg] -reverseDependencyClosure index = - map vertexToPkg - . concatMap Tree.flatten - . Graph.dfs reverseDepGraph - . map (fromMaybe noSuchPkgId . pkgIdToVertex) - - where - (depGraph, vertexToPkg, pkgIdToVertex) = dependencyGraph index - reverseDepGraph = Graph.transposeG depGraph - noSuchPkgId = error "reverseDependencyClosure: package is not in the graph" - -topologicalOrder :: PackageFixedDeps pkg => PackageIndex pkg -> [pkg] -topologicalOrder index = map toPkgId - . Graph.topSort - $ graph - where (graph, toPkgId, _) = dependencyGraph index - -reverseTopologicalOrder :: PackageFixedDeps pkg => PackageIndex pkg -> [pkg] -reverseTopologicalOrder index = map toPkgId - . Graph.topSort - . Graph.transposeG - $ graph - where (graph, toPkgId, _) = dependencyGraph index - --- | Given a package index where we assume we want to use all the packages --- (use 'dependencyClosure' if you need to get such a index subset) find out --- if the dependencies within it use consistent versions of each package. --- Return all cases where multiple packages depend on different versions of --- some other package. --- --- Each element in the result is a package name along with the packages that --- depend on it and the versions they require. These are guaranteed to be --- distinct. --- -dependencyInconsistencies :: PackageFixedDeps pkg - => PackageIndex pkg - -> [(PackageName, [(PackageIdentifier, Version)])] -dependencyInconsistencies index = - [ (name, inconsistencies) - | (name, uses) <- Map.toList inverseIndex - , let inconsistencies = duplicatesBy uses - versions = map snd inconsistencies - , reallyIsInconsistent name (nub versions) ] - - where inverseIndex = Map.fromListWith (++) - [ (packageName dep, [(packageId pkg, packageVersion dep)]) - | pkg <- allPackages index - , dep <- depends pkg ] - - duplicatesBy = (\groups -> if length groups == 1 - then [] - else concat groups) - . groupBy (equating snd) - . sortBy (comparing snd) - - reallyIsInconsistent :: PackageName -> [Version] -> Bool - reallyIsInconsistent _ [] = False - reallyIsInconsistent name [v1, v2] = - case (mpkg1, mpkg2) of - (Just pkg1, Just pkg2) -> pkgid1 `notElem` depends pkg2 - && pkgid2 `notElem` depends pkg1 - _ -> True - where - pkgid1 = PackageIdentifier name v1 - pkgid2 = PackageIdentifier name v2 - mpkg1 = lookupPackageId index pkgid1 - mpkg2 = lookupPackageId index pkgid2 - - reallyIsInconsistent _ _ = True - --- | Find if there are any cycles in the dependency graph. If there are no --- cycles the result is @[]@. --- --- This actually computes the strongly connected components. So it gives us a --- list of groups of packages where within each group they all depend on each --- other, directly or indirectly. --- -dependencyCycles :: PackageFixedDeps pkg - => PackageIndex pkg - -> [[pkg]] -dependencyCycles index = - [ vs | Graph.CyclicSCC vs <- Graph.stronglyConnComp adjacencyList ] - where - adjacencyList = [ (pkg, packageId pkg, depends pkg) - | pkg <- allPackages index ] - --- | Builds a graph of the package dependencies. --- --- Dependencies on other packages that are not in the index are discarded. --- You can check if there are any such dependencies with 'brokenPackages'. --- -dependencyGraph :: PackageFixedDeps pkg - => PackageIndex pkg - -> (Graph.Graph, - Graph.Vertex -> pkg, - PackageIdentifier -> Maybe Graph.Vertex) -dependencyGraph index = (graph, vertexToPkg, pkgIdToVertex) - where - graph = Array.listArray bounds $ - map (catMaybes . map pkgIdToVertex . depends) pkgs - vertexToPkg vertex = pkgTable ! vertex - pkgIdToVertex = binarySearch 0 topBound - - pkgTable = Array.listArray bounds pkgs - pkgIdTable = Array.listArray bounds (map packageId pkgs) - pkgs = sortBy (comparing packageId) (allPackages index) - topBound = length pkgs - 1 - bounds = (0, topBound) - - binarySearch a b key - | a > b = Nothing - | otherwise = case compare key (pkgIdTable ! mid) of - LT -> binarySearch a (mid-1) key - EQ -> Just mid - GT -> binarySearch (mid+1) b key - where mid = (a + b) `div` 2 diff --git a/cabal-install/Distribution/Client/PlanIndex.hs b/cabal-install/Distribution/Client/PlanIndex.hs new file mode 100644 index 0000000000..d50affdc98 --- /dev/null +++ b/cabal-install/Distribution/Client/PlanIndex.hs @@ -0,0 +1,205 @@ +{-# LANGUAGE CPP #-} +module Distribution.Client.PlanIndex ( + brokenPackages + , dependencyClosure + , dependencyCycles + , dependencyGraph + , dependencyInconsistencies + , reverseDependencyClosure + , reverseTopologicalOrder + , topologicalOrder + ) where + +import Prelude hiding (lookup) +import qualified Data.Map as Map +import qualified Data.Tree as Tree +import qualified Data.Graph as Graph +import qualified Data.Array as Array +import Data.Array ((!)) +import Data.List (groupBy, sortBy, nub) +import Data.Maybe (isNothing, fromMaybe, catMaybes) + +#if !MIN_VERSION_base(4,8,0) +import Data.Monoid (Monoid(..)) +#endif + +import Distribution.Package + ( PackageName(..), PackageIdentifier(..) + , Package(..), packageName, packageVersion + ) +import Distribution.Version + ( Version ) +import Distribution.Simple.Utils (equating, comparing) + +import Distribution.Client.PackageIndex + ( PackageFixedDeps(..) ) +import Distribution.Client.PackageIndex + ( PackageIndex, lookupPackageId, allPackages, insert ) + +-- | All packages that have dependencies that are not in the index. +-- +-- Returns such packages along with the dependencies that they're missing. +-- +brokenPackages :: PackageFixedDeps pkg + => PackageIndex pkg + -> [(pkg, [PackageIdentifier])] +brokenPackages index = + [ (pkg, missing) + | pkg <- allPackages index + , let missing = [ pkg' | pkg' <- depends pkg + , isNothing (lookupPackageId index pkg') ] + , not (null missing) ] + +-- | Given a package index where we assume we want to use all the packages +-- (use 'dependencyClosure' if you need to get such a index subset) find out +-- if the dependencies within it use consistent versions of each package. +-- Return all cases where multiple packages depend on different versions of +-- some other package. +-- +-- Each element in the result is a package name along with the packages that +-- depend on it and the versions they require. These are guaranteed to be +-- distinct. +-- +dependencyInconsistencies :: PackageFixedDeps pkg + => PackageIndex pkg + -> [(PackageName, [(PackageIdentifier, Version)])] +dependencyInconsistencies index = + [ (name, inconsistencies) + | (name, uses) <- Map.toList inverseIndex + , let inconsistencies = duplicatesBy uses + versions = map snd inconsistencies + , reallyIsInconsistent name (nub versions) ] + + where inverseIndex = Map.fromListWith (++) + [ (packageName dep, [(packageId pkg, packageVersion dep)]) + | pkg <- allPackages index + , dep <- depends pkg ] + + duplicatesBy = (\groups -> if length groups == 1 + then [] + else concat groups) + . groupBy (equating snd) + . sortBy (comparing snd) + + reallyIsInconsistent :: PackageName -> [Version] -> Bool + reallyIsInconsistent _ [] = False + reallyIsInconsistent name [v1, v2] = + case (mpkg1, mpkg2) of + (Just pkg1, Just pkg2) -> pkgid1 `notElem` depends pkg2 + && pkgid2 `notElem` depends pkg1 + _ -> True + where + pkgid1 = PackageIdentifier name v1 + pkgid2 = PackageIdentifier name v2 + mpkg1 = lookupPackageId index pkgid1 + mpkg2 = lookupPackageId index pkgid2 + + reallyIsInconsistent _ _ = True + +-- | Find if there are any cycles in the dependency graph. If there are no +-- cycles the result is @[]@. +-- +-- This actually computes the strongly connected components. So it gives us a +-- list of groups of packages where within each group they all depend on each +-- other, directly or indirectly. +-- +dependencyCycles :: PackageFixedDeps pkg + => PackageIndex pkg + -> [[pkg]] +dependencyCycles index = + [ vs | Graph.CyclicSCC vs <- Graph.stronglyConnComp adjacencyList ] + where + adjacencyList = [ (pkg, packageId pkg, depends pkg) + | pkg <- allPackages index ] + +-- | Tries to take the transitive closure of the package dependencies. +-- +-- If the transitive closure is complete then it returns that subset of the +-- index. Otherwise it returns the broken packages as in 'brokenPackages'. +-- +-- * Note that if the result is @Right []@ it is because at least one of +-- the original given 'PackageIdentifier's do not occur in the index. +-- +dependencyClosure :: PackageFixedDeps pkg + => PackageIndex pkg + -> [PackageIdentifier] + -> Either (PackageIndex pkg) + [(pkg, [PackageIdentifier])] +dependencyClosure index pkgids0 = case closure mempty [] pkgids0 of + (completed, []) -> Left completed + (completed, _) -> Right (brokenPackages completed) + where + closure completed failed [] = (completed, failed) + closure completed failed (pkgid:pkgids) = case lookupPackageId index pkgid of + Nothing -> closure completed (pkgid:failed) pkgids + Just pkg -> case lookupPackageId completed (packageId pkg) of + Just _ -> closure completed failed pkgids + Nothing -> closure completed' failed pkgids' + where completed' = insert pkg completed + pkgids' = depends pkg ++ pkgids + +topologicalOrder :: PackageFixedDeps pkg => PackageIndex pkg -> [pkg] +topologicalOrder index = map toPkgId + . Graph.topSort + $ graph + where (graph, toPkgId, _) = dependencyGraph index + +reverseTopologicalOrder :: PackageFixedDeps pkg => PackageIndex pkg -> [pkg] +reverseTopologicalOrder index = map toPkgId + . Graph.topSort + . Graph.transposeG + $ graph + where (graph, toPkgId, _) = dependencyGraph index + +-- | Takes the transitive closure of the packages reverse dependencies. +-- +-- * The given 'PackageIdentifier's must be in the index. +-- +reverseDependencyClosure :: PackageFixedDeps pkg + => PackageIndex pkg + -> [PackageIdentifier] + -> [pkg] +reverseDependencyClosure index = + map vertexToPkg + . concatMap Tree.flatten + . Graph.dfs reverseDepGraph + . map (fromMaybe noSuchPkgId . pkgIdToVertex) + + where + (depGraph, vertexToPkg, pkgIdToVertex) = dependencyGraph index + reverseDepGraph = Graph.transposeG depGraph + noSuchPkgId = error "reverseDependencyClosure: package is not in the graph" + + + + +-- | Builds a graph of the package dependencies. +-- +-- Dependencies on other packages that are not in the index are discarded. +-- You can check if there are any such dependencies with 'brokenPackages'. +-- +dependencyGraph :: PackageFixedDeps pkg + => PackageIndex pkg + -> (Graph.Graph, + Graph.Vertex -> pkg, + PackageIdentifier -> Maybe Graph.Vertex) +dependencyGraph index = (graph, vertexToPkg, pkgIdToVertex) + where + graph = Array.listArray bounds $ + map (catMaybes . map pkgIdToVertex . depends) pkgs + vertexToPkg vertex = pkgTable ! vertex + pkgIdToVertex = binarySearch 0 topBound + + pkgTable = Array.listArray bounds pkgs + pkgIdTable = Array.listArray bounds (map packageId pkgs) + pkgs = sortBy (comparing packageId) (allPackages index) + topBound = length pkgs - 1 + bounds = (0, topBound) + + binarySearch a b key + | a > b = Nothing + | otherwise = case compare key (pkgIdTable ! mid) of + LT -> binarySearch a (mid-1) key + EQ -> Just mid + GT -> binarySearch (mid+1) b key + where mid = (a + b) `div` 2 -- GitLab