Commit 267efc85 authored by kristenk's avatar kristenk Committed by GitHub
Browse files

Merge pull request #4674 from grayjay/solver-hackage-benchmark

Start adding a benchmark that solves for all packages on Hackage.
parents 5377e101 384bb92f
......@@ -47,7 +47,7 @@ matrix:
- env: GHCVER=7.10.3 SCRIPT=script USE_GOLD=YES
os: linux
sudo: required
- env: GHCVER=8.0.2 SCRIPT=script DEPLOY_DOCS=YES USE_GOLD=YES
- env: GHCVER=8.0.2 SCRIPT=script DEPLOY_DOCS=YES USE_GOLD=YES TEST_SOLVER_BENCHMARKS=YES
sudo: required
os: linux
......
packages: Cabal/ cabal-testsuite/ cabal-install/
packages: Cabal/ cabal-testsuite/ cabal-install/ solver-benchmarks/
constraints: unix >= 2.7.1.0,
cabal-install +lib +monolithic
......
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TupleSections #-}
module HackageBenchmark (
hackageBenchmarkMain
-- Exposed for testing:
, CabalResult(..)
, isSignificantTimeDifference
, combineTrialResults
, isSignificantResult
, shouldContinueAfterFirstTrial
) where
import Control.Monad (forM_, replicateM, unless, when)
import qualified Data.ByteString as B
import Data.List (nub, unzip4)
import Data.Maybe (isJust)
import Data.Monoid ((<>))
import Data.String (fromString)
import Data.Time (NominalDiffTime, diffUTCTime, getCurrentTime)
import qualified Data.Vector.Unboxed as V
import Options.Applicative
import Statistics.Sample (mean, stdDev)
import Statistics.Test.MannWhitneyU ( PositionTest(..), TestResult(..)
, mannWhitneyUCriticalValue
, mannWhitneyUtest)
import Statistics.Types (PValue, mkPValue)
import System.Exit (ExitCode(..), exitFailure)
import System.IO ( BufferMode(LineBuffering), hPutStrLn, hSetBuffering, stderr
, stdout)
import System.Process ( StdStream(CreatePipe), CreateProcess(..), callProcess
, createProcess, readProcess, shell, waitForProcess )
import Text.Printf (printf)
import Distribution.Package (PackageName, mkPackageName, unPackageName)
data Args = Args {
argCabal1 :: FilePath
, argCabal2 :: FilePath
, argCabal1Flags :: [String]
, argCabal2Flags :: [String]
, argPackages :: [PackageName]
, argMinRunTimeDifferenceToRerun :: Double
, argPValue :: PValue Double
, argTrials :: Int
, argPrintTrials :: Bool
, argPrintSkippedPackages :: Bool
, argTimeoutSeconds :: Int
}
data CabalTrial = CabalTrial NominalDiffTime CabalResult
data CabalResult
= Solution
| NoInstallPlan
| BackjumpLimit
| PkgNotFound
| Timeout
| Unknown
deriving (Eq, Show)
hackageBenchmarkMain :: IO ()
hackageBenchmarkMain = do
hSetBuffering stdout LineBuffering
args@Args {..} <- execParser parserInfo
checkArgs args
printConfig args
pkgs <- getPackages args
putStrLn ""
let -- The maximum length of the heading and package names.
nameColumnWidth :: Int
nameColumnWidth =
maximum $ map length $ "package" : map unPackageName pkgs
runCabal1 = runCabal argTimeoutSeconds argCabal1 argCabal1Flags
runCabal2 = runCabal argTimeoutSeconds argCabal2 argCabal2Flags
-- When the output contains both trails and summaries, label each row as
-- "trial" or "summary".
when argPrintTrials $ putStr $ printf "%-16s " "trial/summary"
putStrLn $
printf "%-*s %-13s %-13s %11s %11s %11s %11s %11s"
nameColumnWidth "package" "result1" "result2"
"mean1" "mean2" "stddev1" "stddev2" "speedup"
forM_ pkgs $ \pkg -> do
let printTrial msgType result1 result2 time1 time2 =
putStrLn $
printf "%-16s %-*s %-13s %-13s %10.3fs %10.3fs"
msgType nameColumnWidth (unPackageName pkg)
(show result1) (show result2)
(diffTimeToDouble time1) (diffTimeToDouble time2)
CabalTrial t1 r1 <- runCabal1 pkg
CabalTrial t2 r2 <- runCabal2 pkg
if not $
shouldContinueAfterFirstTrial argMinRunTimeDifferenceToRerun t1 t2 r1 r2
then when argPrintSkippedPackages $
if argPrintTrials
then printTrial "trial (skipping)" r1 r2 t1 t2
else putStrLn $ printf "%-*s (first run times were too similar)"
nameColumnWidth (unPackageName pkg)
else do
when argPrintTrials $ printTrial "trial" r1 r2 t1 t2
(ts1, ts2, rs1, rs2) <- (unzip4 . ((t1, t2, r1, r2) :) <$>)
. replicateM (argTrials - 1) $ do
CabalTrial t1' r1' <- runCabal1 pkg
CabalTrial t2' r2' <- runCabal2 pkg
when argPrintTrials $ printTrial "trial" r1' r2' t1' t2'
return (t1', t2', r1', r2')
let result1 = combineTrialResults rs1
result2 = combineTrialResults rs2
times1 = V.fromList (map diffTimeToDouble ts1)
times2 = V.fromList (map diffTimeToDouble ts2)
mean1 = mean times1
mean2 = mean times2
stddev1 = stdDev times1
stddev2 = stdDev times2
speedup = mean1 / mean2
when argPrintTrials $ putStr $ printf "%-16s " "summary"
if isSignificantResult result1 result2
|| isSignificantTimeDifference argPValue ts1 ts2
then putStrLn $
printf "%-*s %-13s %-13s %10.3fs %10.3fs %10.3fs %10.3fs %10.3f"
nameColumnWidth (unPackageName pkg)
(show result1) (show result2) mean1 mean2 stddev1 stddev2 speedup
else when (argPrintTrials || argPrintSkippedPackages) $
putStrLn $
printf "%-*s (not significant)" nameColumnWidth (unPackageName pkg)
where
checkArgs :: Args -> IO ()
checkArgs Args {..} = do
let die msg = hPutStrLn stderr msg >> exitFailure
unless (argTrials > 0) $ die "--trials must be greater than 0."
unless (argMinRunTimeDifferenceToRerun >= 0) $
die "--min-run-time-percentage-difference-to-rerun must be non-negative."
unless (isSampleLargeEnough argPValue argTrials) $
die "p-value is too small for the number of trials."
printConfig :: Args -> IO ()
printConfig Args {..} = do
putStrLn "Comparing:"
putStrLn $ "1: " ++ argCabal1 ++ " " ++ unwords argCabal1Flags
callProcess argCabal1 ["--version"]
putStrLn $ "2: " ++ argCabal2 ++ " " ++ unwords argCabal2Flags
callProcess argCabal2 ["--version"]
-- TODO: Print index state.
putStrLn "Base package database:"
callProcess "ghc-pkg" ["list"]
getPackages :: Args -> IO [PackageName]
getPackages Args {..} = do
pkgs <-
if null argPackages
then do
putStrLn $ "Obtaining the package list (using " ++ argCabal1 ++ ") ..."
list <- readProcess argCabal1 ["list", "--simple-output"] ""
return $ nub [mkPackageName $ head (words line) | line <- lines list]
else do
putStrLn "Using given package list ..."
return argPackages
putStrLn $ "Done, got " ++ show (length pkgs) ++ " packages."
return pkgs
runCabal :: Int -> FilePath -> [String] -> PackageName -> IO CabalTrial
runCabal timeoutSeconds cabal flags pkg = do
((exitCode, err), time) <- timeEvent $ do
let timeout = "timeout --foreground -sINT " ++ show timeoutSeconds
cabalCmd =
unwords $
[cabal, "install", unPackageName pkg, "--dry-run", "-v0"] ++ flags
cmd = (shell (timeout ++ " " ++ cabalCmd)) { std_err = CreatePipe }
-- TODO: Read stdout and compare the install plans.
(_, _, Just errh, ph) <- createProcess cmd
err <- B.hGetContents errh
(, err) <$> waitForProcess ph
let exhaustiveMsg =
"After searching the rest of the dependency tree exhaustively"
result
| exitCode == ExitSuccess = Solution
| exitCode == ExitFailure 124 = Timeout
| fromString exhaustiveMsg `B.isInfixOf` err = NoInstallPlan
| fromString "Backjump limit reached" `B.isInfixOf` err = BackjumpLimit
| fromString "There is no package named" `B.isInfixOf` err = PkgNotFound
| otherwise = Unknown
return (CabalTrial time result)
isSampleLargeEnough :: PValue Double -> Int -> Bool
isSampleLargeEnough pvalue trials =
-- mannWhitneyUCriticalValue, which can fail with too few samples, is only
-- used when both sample sizes are less than or equal to 20.
trials > 20 || isJust (mannWhitneyUCriticalValue (trials, trials) pvalue)
isSignificantTimeDifference :: PValue Double -> [NominalDiffTime] -> [NominalDiffTime] -> Bool
isSignificantTimeDifference pvalue xs ys =
let toVector = V.fromList . map diffTimeToDouble
in case mannWhitneyUtest SamplesDiffer pvalue (toVector xs) (toVector ys) of
Nothing -> error "not enough data for mannWhitneyUtest"
Just Significant -> True
Just NotSignificant -> False
-- Should we stop after the first trial of this package to save time? This
-- function skips the package if the results are uninteresting and the times are
-- within --min-run-time-percentage-difference-to-rerun.
shouldContinueAfterFirstTrial :: Double
-> NominalDiffTime
-> NominalDiffTime
-> CabalResult
-> CabalResult
-> Bool
shouldContinueAfterFirstTrial 0 _ _ _ _ = True
shouldContinueAfterFirstTrial _ _ _ Timeout Timeout = False
shouldContinueAfterFirstTrial maxRunTimeDifferenceToIgnore t1 t2 r1 r2 =
isSignificantResult r1 r2
|| abs (t1 - t2) / min t1 t2 >= realToFrac (maxRunTimeDifferenceToIgnore / 100)
isSignificantResult :: CabalResult -> CabalResult -> Bool
isSignificantResult r1 r2 = r1 /= r2 || not (isExpectedResult r1)
-- Is this result expected in a benchmark run on all of Hackage?
isExpectedResult :: CabalResult -> Bool
isExpectedResult Solution = True
isExpectedResult NoInstallPlan = True
isExpectedResult BackjumpLimit = True
isExpectedResult Timeout = True
isExpectedResult PkgNotFound = False
isExpectedResult Unknown = False
-- Combine CabalResults from multiple trials. Ignoring timeouts, all results
-- should be the same. If they aren't the same, we returns Unknown.
combineTrialResults :: [CabalResult] -> CabalResult
combineTrialResults rs
| allEqual rs = head rs
| allEqual [r | r <- rs, r /= Timeout] = Timeout
| otherwise = Unknown
where
allEqual :: Eq a => [a] -> Bool
allEqual xs = length (nub xs) == 1
timeEvent :: IO a -> IO (a, NominalDiffTime)
timeEvent task = do
start <- getCurrentTime
r <- task
end <- getCurrentTime
return (r, diffUTCTime end start)
diffTimeToDouble :: NominalDiffTime -> Double
diffTimeToDouble = fromRational . toRational
parserInfo :: ParserInfo Args
parserInfo = info (argParser <**> helper)
( fullDesc
<> progDesc ("Find differences between two cabal commands when solving"
++ " for all packages on Hackage.")
<> header "hackage-benchmark" )
argParser :: Parser Args
argParser = Args
<$> strOption
( long "cabal1"
<> metavar "PATH"
<> help "First cabal executable")
<*> strOption
( long "cabal2"
<> metavar "PATH"
<> help "Second cabal executable")
<*> option (words <$> str)
( long "cabal1-flags"
<> value []
<> metavar "FLAGS"
<> help "Extra flags for the first cabal executable")
<*> option (words <$> str)
( long "cabal2-flags"
<> value []
<> metavar "FLAGS"
<> help "Extra flags for the second cabal executable")
<*> option (map mkPackageName . words <$> str)
( long "packages"
<> value []
<> metavar "PACKAGES"
<> help ("Space separated list of packages to test, or all of Hackage"
++ " if unspecified"))
<*> option auto
( long "min-run-time-percentage-difference-to-rerun"
<> showDefault
<> value 0.0
<> metavar "PERCENTAGE"
<> help ("Stop testing a package when the difference in run times in"
++ " the first trial are within this percentage, in order to"
++ " save time"))
<*> option (mkPValue <$> auto)
( long "pvalue"
<> showDefault
<> value (mkPValue 0.05)
<> metavar "DOUBLE"
<> help ("p-value used to determine whether to print the results for"
++ " each package"))
<*> option auto
( long "trials"
<> showDefault
<> value 10
<> metavar "N"
<> help "Number of trials for each package")
<*> switch
( long "print-trials"
<> help "Whether to include the results from individual trials in the output")
<*> switch
( long "print-skipped-packages"
<> help "Whether to include skipped packages in the output")
<*> option auto
( long "timeout"
<> showDefault
<> value 90
<> metavar "SECONDS"
<> help "Maximum time to run a cabal command, in seconds")
Copyright (c) 2003-2017, Cabal Development Team.
See the AUTHORS file for the full list of copyright holders.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
* Neither the name of Isaac Jones nor the names of other
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Dependency Solver Benchmarks
============================
hackage-benchmark
-----------------
The goal of this benchmark is to find examples of packages that show a
difference in behavior between two versions of cabal. It doesn't try
to determine which version of cabal performs better.
`hackage-benchmark` compares two `cabal` commands by running each one
on each package in a list. The list is either the package index or a
list of packages provided on the command line. In order to save time,
the benchmark initially only runs one trial for each package. If the
results (solution, no solution, timeout, etc.) are the same and the
times are too similar, it skips the package. Otherwise, it runs more
trials and prints the results if they are significant.
import HackageBenchmark
main :: IO ()
main = hackageBenchmarkMain
name: solver-benchmarks
version: 2.1.0.0
copyright: 2003-2017, Cabal Development Team (see AUTHORS file)
license: BSD3
license-file: LICENSE
author: Cabal Development Team <cabal-devel@haskell.org>
maintainer: cabal-devel@haskell.org
homepage: http://www.haskell.org/cabal/
bug-reports: https://github.com/haskell/cabal/issues
synopsis: Benchmarks for the cabal dependency solver
description:
This package contains benchmarks that test cabal's dependency solver by running the cabal executable.
category: Distribution
cabal-version: >=1.10
build-type: Simple
extra-source-files:
README.md
source-repository head
type: git
location: https://github.com/haskell/cabal/
subdir: solver-benchmarks
library
ghc-options: -Wall -fwarn-tabs
exposed-modules:
HackageBenchmark
build-depends:
base,
bytestring,
Cabal >= 2.1,
optparse-applicative,
process,
time,
statistics >= 0.14 && < 0.15,
vector
default-language: Haskell2010
executable hackage-benchmark
main-is: hackage-benchmark.hs
hs-source-dirs: main
ghc-options: -threaded -Wall -fwarn-tabs
build-depends:
base,
solver-benchmarks
default-language: Haskell2010
test-suite unit-tests
type: exitcode-stdio-1.0
main-is: HackageBenchmarkTest.hs
hs-source-dirs: tests
ghc-options: -threaded -Wall -fwarn-tabs
build-depends:
base,
solver-benchmarks,
statistics >= 0.14 && < 0.15,
tasty,
tasty-hunit
default-language: Haskell2010
import HackageBenchmark
import Statistics.Types (mkPValue)
import Test.Tasty (TestTree, defaultMain, testGroup)
import Test.Tasty.HUnit (assert, testCase, (@?=))
main :: IO ()
main = defaultMain tests
tests :: TestTree
tests = testGroup "unit tests" [
testGroup "isSignificantTimeDifference" [
testCase "detect increase in distribution" $ assert $
isSignificantTimeDifference (mkPValue 0.05) [1,2..7] [4,5..10]
, testCase "detect decrease in distribution" $ assert $
isSignificantTimeDifference (mkPValue 0.05) [1,2..7] [-2,-1..4]
, testCase "ignore same data" $ assert $
not $ isSignificantTimeDifference (mkPValue 0.05) [1,2..10] [1,2..10]
, testCase "same data with high p-value is significant" $ assert $
isSignificantTimeDifference (mkPValue 0.9) [1,2..10] [1,2..10]
, testCase "ignore outlier" $ assert $
not $ isSignificantTimeDifference (mkPValue 0.05) [1, 2, 1, 1, 1] [2, 1, 50, 1, 1]
]
, testGroup "combineTrialResults" [
testCase "convert unexpected difference to Unknown" $
combineTrialResults [NoInstallPlan, BackjumpLimit] @?= Unknown
, testCase "return one of identical errors" $
combineTrialResults [NoInstallPlan, NoInstallPlan] @?= NoInstallPlan
, testCase "return one of identical successes" $
combineTrialResults [Solution, Solution] @?= Solution
, testCase "timeout overrides other results" $
combineTrialResults [Solution, Timeout, Solution] @?= Timeout
, testCase "convert unexpected difference to Unknown, even with timeout" $
combineTrialResults [Solution, Timeout, NoInstallPlan] @?= Unknown
]
, testGroup "isSignificantResult" [
testCase "different results are significant" $ assert $
isSignificantResult NoInstallPlan BackjumpLimit
, testCase "unknown result is significant" $ assert $
isSignificantResult Unknown Unknown
, testCase "PkgNotFound is significant" $ assert $
isSignificantResult PkgNotFound PkgNotFound
, testCase "same expected error is not significant" $ assert $
not $ isSignificantResult NoInstallPlan NoInstallPlan
, testCase "success is not significant" $ assert $
not $ isSignificantResult Solution Solution
]
, testGroup "shouldContinueAfterFirstTrial" [
testCase "rerun when min difference is zero" $ assert $
shouldContinueAfterFirstTrial 0 1.0 1.0 Solution Solution
, testCase "rerun when min difference is zero, even with timeout" $ assert $
shouldContinueAfterFirstTrial 0 1.0 1.0 Timeout Timeout
, testCase "treat timeouts as the same time" $ assert $
not $ shouldContinueAfterFirstTrial 0.000001 89.9 92.0 Timeout Timeout
, testCase "skip when times are too close - 1" $ assert $
not $ shouldContinueAfterFirstTrial 10 1.0 0.91 Solution Solution
, testCase "skip when times are too close - 2" $ assert $
not $ shouldContinueAfterFirstTrial 10 1.0 1.09 Solution Solution
, testCase "rerun when times aren't too close - 1" $ assert $
shouldContinueAfterFirstTrial 10 1.0 0.905 Solution Solution
, testCase "rerun when times aren't too close - 2" $ assert $
shouldContinueAfterFirstTrial 10 1.0 1.1 Solution Solution
]
]
......@@ -15,6 +15,7 @@ CABAL_BDIR="${TRAVIS_BUILD_DIR}/dist-newstyle/build/$ARCH/ghc-$GHCVER/Cabal-${CA
CABAL_TESTSUITE_BDIR="${TRAVIS_BUILD_DIR}/dist-newstyle/build/$ARCH/ghc-$GHCVER/cabal-testsuite-${CABAL_VERSION}"
CABAL_INSTALL_BDIR="${TRAVIS_BUILD_DIR}/dist-newstyle/build/$ARCH/ghc-$GHCVER/cabal-install-${CABAL_VERSION}"
CABAL_INSTALL_SETUP="${CABAL_INSTALL_BDIR}/setup/setup"
SOLVER_BENCHMARKS_BDIR="${TRAVIS_BUILD_DIR}/dist-newstyle/build/$ARCH/ghc-$GHCVER/solver-benchmarks-${CABAL_VERSION}"
HACKAGE_REPO_TOOL_BDIR="${TRAVIS_BUILD_DIR}/dist-newstyle/build/$ARCH/ghc-$GHCVER/hackage-repo-tool-${HACKAGE_REPO_TOOL_VERSION}/c/hackage-repo-tool"
# ---------------------------------------------------------------------
......
......@@ -167,6 +167,11 @@ timed ${CABAL_INSTALL_BDIR}/build/cabal/cabal update
(cd cabal-install && timed cabal check) || exit $?
if [ "x$TEST_SOLVER_BENCHMARKS" = "xYES" ]; then
timed cabal new-build $jobs solver-benchmarks:hackage-benchmark solver-benchmarks:unit-tests
timed ${SOLVER_BENCHMARKS_BDIR}/c/unit-tests/build/unit-tests/unit-tests $TEST_OPTIONS
fi
unset CABAL_BUILDDIR
# Check what we got
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment