-
Phil de Joux authored
- Satisfy -Wmissing-signatures in test-runtime-deps - Satisfy -Wx-partial in HackageBenchmark - Satisfy -Wunused-imports in QuickCheck.Instances.Cabal - Use partial pattern for filtering in list comprehension - Don't error on deprecated import
Phil de Joux authored- Satisfy -Wmissing-signatures in test-runtime-deps - Satisfy -Wx-partial in HackageBenchmark - Satisfy -Wunused-imports in QuickCheck.Instances.Cabal - Use partial pattern for filtering in list comprehension - Don't error on deprecated import
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
HackageBenchmark.hs 16.59 KiB
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE ScopedTypeVariables #-}
module HackageBenchmark (
hackageBenchmarkMain
-- Exposed for testing:
, CabalResult(..)
, isSignificantTimeDifference
, combineTrialResults
, isSignificantResult
, shouldContinueAfterFirstTrial
) where
import Control.Concurrent.Async (concurrently)
import Control.Monad (forM, replicateM, unless, when)
import qualified Data.ByteString as BS
import Data.List (nub, unzip4)
import Data.Maybe (isJust, catMaybes)
import Data.String (fromString)
import Data.Function ((&))
import Data.Time (NominalDiffTime, diffUTCTime, getCurrentTime)
import qualified Data.Vector.Unboxed as V
import Options.Applicative
import Statistics.Sample (mean, stdDev, geometricMean)
import Statistics.Test.MannWhitneyU ( PositionTest(..), TestResult(..)
, mannWhitneyUCriticalValue
, mannWhitneyUtest)
import Statistics.Types (PValue, mkPValue)
import System.Directory (getTemporaryDirectory, createDirectoryIfMissing)
import System.Environment (getEnvironment)
import System.Exit (ExitCode(..), exitWith, exitFailure)
import System.FilePath ((</>))
import System.IO ( BufferMode(LineBuffering), hPutStrLn, hSetBuffering, stderr
, stdout)
import System.Process ( StdStream(CreatePipe), CreateProcess(..), callProcess
, createProcess, readProcess, shell, waitForProcess, proc, readCreateProcessWithExitCode )
import Text.Printf (printf)
import qualified Data.Map.Strict as Map
import Distribution.Package (PackageName, mkPackageName, unPackageName)
data Args = Args {
argCabal1 :: FilePath
, argCabal2 :: FilePath
, argCabal1Flags :: [String]
, argCabal2Flags :: [String]
, argPackages :: [PackageName]
, argMinRunTimeDifferenceToRerun :: Double
, argPValue :: PValue Double
, argTrials :: Int
, argConcurrently :: Bool
, argPrintTrials :: Bool
, argPrintSkippedPackages :: Bool
, argTimeoutSeconds :: Int
}
data CabalTrial = CabalTrial NominalDiffTime CabalResult
data CabalResult
= Solution
| NoInstallPlan
| BackjumpLimit
| Unbuildable
| UnbuildableDep
| ComponentCycle
| ModReexpIssue
| PkgNotFound
| Timeout
| Unknown
deriving (Eq, Show)
hackageBenchmarkMain :: IO ()
hackageBenchmarkMain = do
hSetBuffering stdout LineBuffering
args@Args {..} <- execParser parserInfo
checkArgs args
printConfig args
pkgs <- getPackages args
putStrLn ""
let concurrently' :: IO a -> IO b -> IO (a, b)
concurrently' | argConcurrently = concurrently
| otherwise = \ma mb -> do { a <- ma; b <- mb; return (a, b) }
let -- The maximum length of the heading and package names.
nameColumnWidth :: Int
nameColumnWidth =
maximum $ map length $ "package" : map unPackageName pkgs
-- create cabal runners
runCabal1 <- runCabal argTimeoutSeconds CabalUnderTest1 argCabal1 argCabal1Flags
runCabal2 <- runCabal argTimeoutSeconds CabalUnderTest2 argCabal2 argCabal2Flags
-- When the output contains both trails and summaries, label each row as
-- "trial" or "summary".
when argPrintTrials $ putStr $ printf "%-16s " "trial/summary"
putStrLn $
printf "%-*s %-14s %-14s %11s %11s %11s %11s %11s"
nameColumnWidth "package" "result1" "result2"
"mean1" "mean2" "stddev1" "stddev2" "speedup"
speedups :: [Double] <- fmap catMaybes $ forM pkgs $ \pkg -> do
let printTrial msgType result1 result2 time1 time2 =
putStrLn $
printf "%-16s %-*s %-14s %-14s %10.3fs %10.3fs"
msgType nameColumnWidth (unPackageName pkg)
(show result1) (show result2)
(diffTimeToDouble time1) (diffTimeToDouble time2)
(CabalTrial t1 r1, CabalTrial t2 r2) <- runCabal1 pkg `concurrently'` runCabal2 pkg
if not $
shouldContinueAfterFirstTrial argMinRunTimeDifferenceToRerun t1 t2 r1 r2
then do
when argPrintSkippedPackages $
if argPrintTrials
then printTrial "trial (skipping)" r1 r2 t1 t2
else putStrLn $ printf "%-*s (first run times were too similar)"
nameColumnWidth (unPackageName pkg)
return Nothing
else do
when argPrintTrials $ printTrial "trial" r1 r2 t1 t2
(ts1, ts2, rs1, rs2) <- (unzip4 . ((t1, t2, r1, r2) :) <$>)
. replicateM (argTrials - 1) $ do
(CabalTrial t1' r1', CabalTrial t2' r2') <- runCabal1 pkg `concurrently'` runCabal2 pkg
when argPrintTrials $ printTrial "trial" r1' r2' t1' t2'
return (t1', t2', r1', r2')
let result1 = combineTrialResults rs1
result2 = combineTrialResults rs2
times1 = V.fromList (map diffTimeToDouble ts1)
times2 = V.fromList (map diffTimeToDouble ts2)
mean1 = mean times1
mean2 = mean times2
stddev1 = stdDev times1
stddev2 = stdDev times2
speedup = mean1 / mean2
when argPrintTrials $ putStr $ printf "%-16s " "summary"
if isSignificantResult result1 result2
|| isSignificantTimeDifference argPValue ts1 ts2
then putStrLn $
printf "%-*s %-14s %-14s %10.3fs %10.3fs %10.3fs %10.3fs %10.3f"
nameColumnWidth (unPackageName pkg)
(show result1) (show result2) mean1 mean2 stddev1 stddev2 speedup
else when (argPrintTrials || argPrintSkippedPackages) $
putStrLn $
printf "%-*s (not significant, speedup = %10.3f)" nameColumnWidth (unPackageName pkg) speedup
-- return speedup value
return (Just speedup)
-- finally, calculate the geometric mean of speedups
printf "Geometric mean of %d packages' speedups is %10.3f\n" (length speedups) (geometricMean (V.fromList speedups))
where
checkArgs :: Args -> IO ()
checkArgs Args {..} = do
let die msg = hPutStrLn stderr msg >> exitFailure
unless (argTrials > 0) $ die "--trials must be greater than 0."
unless (argMinRunTimeDifferenceToRerun >= 0) $
die "--min-run-time-percentage-difference-to-rerun must be non-negative."
unless (isSampleLargeEnough argPValue argTrials) $
die "p-value is too small for the number of trials."
printConfig :: Args -> IO ()
printConfig Args {..} = do
putStrLn "Comparing:"
putStrLn $ "1: " ++ argCabal1 ++ " " ++ unwords argCabal1Flags
callProcess argCabal1 ["--version"]
putStrLn $ "2: " ++ argCabal2 ++ " " ++ unwords argCabal2Flags
callProcess argCabal2 ["--version"]
-- TODO: Print index state.
putStrLn "Base package database:"
callProcess "ghc-pkg" ["list"]
getPackages :: Args -> IO [PackageName]
getPackages Args {..} = do
pkgs <-
if null argPackages
then do
putStrLn $ "Obtaining the package list (using " ++ argCabal1 ++ ") ..."
list <- readProcess argCabal1 ["list", "--simple-output"] ""
return $ nub [mkPackageName n | n : _ <- words <$> lines list]
else do
putStrLn "Using given package list ..."
return argPackages
putStrLn $ "Done, got " ++ show (length pkgs) ++ " packages."
return pkgs
data CabalUnderTest = CabalUnderTest1 | CabalUnderTest2
runCabal
:: Int -- ^ timeout in seconds
-> CabalUnderTest -- ^ cabal under test
-> FilePath -- ^ cabal
-> [String] -- ^ flags
-> IO (PackageName -> IO CabalTrial) -- ^ testing function.
runCabal timeoutSeconds cabalUnderTest cabal flags = do
tmpDir <- getTemporaryDirectory
-- cabal directory for this cabal under test
let cabalDir = tmpDir </> "solver-benchmarks-workdir" </> case cabalUnderTest of
CabalUnderTest1 -> "cabal1"
CabalUnderTest2 -> "cabal2"
putStrLn $ "Cabal directory (for " ++ cabal ++ ") " ++ cabalDir
createDirectoryIfMissing True cabalDir
-- shell environment
currEnv <- Map.fromList <$> getEnvironment
let thisEnv :: [(String, String)]
thisEnv = Map.toList $ currEnv
& Map.insert "CABAL_CONFIG" (cabalDir </> "config")
& Map.insert "CABAL_DIR" cabalDir
-- Initialize the config file, whether or not it already exists
runCabalCmdWithEnv cabalDir thisEnv ["user-config", "init", "--force"]
-- Run cabal update
putStrLn $ "Running cabal update (using " ++ cabal ++ ") ..."
runCabalCmdWithEnv cabalDir thisEnv ["update"]
-- return an actual runner
return $ \pkg -> do
((exitCode, err), time) <- timeEvent $ do
let timeout = "timeout --foreground -sINT " ++ show timeoutSeconds
cabalCmd = unwords $
[ cabal
, "install"
-- These flags prevent a Cabal project or package environment from
-- affecting the install plan.
--
-- Note: we are somewhere in /tmp, hopefully there is no cabal.project on upper level
, "--package-env=non-existent-package-env"
-- --lib allows solving for packages with libraries or
-- executables.
, "--lib"
, unPackageName pkg
, "--dry-run"
-- The test doesn't currently handle stdout, so we suppress it
-- with silent. nowrap simplifies parsing the errors messages.
, "-vsilent+nowrap"
]
++ flags
cmd = (shell (timeout ++ " " ++ cabalCmd))
{ std_err = CreatePipe
, env = Just thisEnv
, cwd = Just cabalDir
}
-- TODO: Read stdout and compare the install plans.
(_, _, Just errh, ph) <- createProcess cmd
err <- BS.hGetContents errh
(, err) <$> waitForProcess ph
let exhaustiveMsg =
"After searching the rest of the dependency tree exhaustively"
result
| exitCode == ExitSuccess = Solution
| exitCode == ExitFailure 124 = Timeout
| fromString exhaustiveMsg `BS.isInfixOf` err = NoInstallPlan
| fromString "Backjump limit reached" `BS.isInfixOf` err = BackjumpLimit
| fromString "none of the components are available to build" `BS.isInfixOf` err = Unbuildable
| fromString "Dependency on unbuildable" `BS.isInfixOf` err = UnbuildableDep
| fromString "Dependency cycle between the following components" `BS.isInfixOf` err = ComponentCycle
| fromString "Problem with module re-exports" `BS.isInfixOf` err = ModReexpIssue
| fromString "There is no package named" `BS.isInfixOf` err = PkgNotFound
| otherwise = Unknown
return (CabalTrial time result)
where
runCabalCmdWithEnv cabalDir thisEnv args = do
(ec, uout, uerr) <- readCreateProcessWithExitCode (proc cabal args)
{ cwd = Just cabalDir
, env = Just thisEnv
}
""
unless (ec == ExitSuccess) $ do
putStrLn uout
putStrLn uerr
exitWith ec
isSampleLargeEnough :: PValue Double -> Int -> Bool
isSampleLargeEnough pvalue trials =
-- mannWhitneyUCriticalValue, which can fail with too few samples, is only
-- used when both sample sizes are less than or equal to 20.
trials > 20 || isJust (mannWhitneyUCriticalValue (trials, trials) pvalue)
isSignificantTimeDifference :: PValue Double -> [NominalDiffTime] -> [NominalDiffTime] -> Bool
isSignificantTimeDifference pvalue xs ys =
let toVector = V.fromList . map diffTimeToDouble
in case mannWhitneyUtest SamplesDiffer pvalue (toVector xs) (toVector ys) of
Nothing -> error "not enough data for mannWhitneyUtest"
Just Significant -> True
Just NotSignificant -> False
-- Should we stop after the first trial of this package to save time? This
-- function skips the package if the results are uninteresting and the times are
-- within --min-run-time-percentage-difference-to-rerun.
shouldContinueAfterFirstTrial :: Double
-> NominalDiffTime
-> NominalDiffTime
-> CabalResult
-> CabalResult
-> Bool
shouldContinueAfterFirstTrial 0 _ _ _ _ = True
shouldContinueAfterFirstTrial _ _ _ Timeout Timeout = False
shouldContinueAfterFirstTrial maxRunTimeDifferenceToIgnore t1 t2 r1 r2 =
isSignificantResult r1 r2
|| abs (t1 - t2) / min t1 t2 >= realToFrac (maxRunTimeDifferenceToIgnore / 100)
isSignificantResult :: CabalResult -> CabalResult -> Bool
isSignificantResult r1 r2 = r1 /= r2 || not (isExpectedResult r1)
-- Is this result expected in a benchmark run on all of Hackage?
isExpectedResult :: CabalResult -> Bool
isExpectedResult Solution = True
isExpectedResult NoInstallPlan = True
isExpectedResult BackjumpLimit = True
isExpectedResult Timeout = True
isExpectedResult Unbuildable = True
isExpectedResult UnbuildableDep = True
isExpectedResult ComponentCycle = True
isExpectedResult ModReexpIssue = True
isExpectedResult PkgNotFound = False
isExpectedResult Unknown = False
-- Combine CabalResults from multiple trials. Ignoring timeouts, all results
-- should be the same. If they aren't the same, we returns Unknown.
combineTrialResults :: [CabalResult] -> CabalResult
combineTrialResults rs
| r:_ <- rs
, allEqual rs = r
| allEqual [r | r <- rs, r /= Timeout] = Timeout
| otherwise = Unknown
where
allEqual :: Eq a => [a] -> Bool
allEqual xs = length (nub xs) == 1
timeEvent :: IO a -> IO (a, NominalDiffTime)
timeEvent task = do
start <- getCurrentTime
r <- task
end <- getCurrentTime
return (r, diffUTCTime end start)
diffTimeToDouble :: NominalDiffTime -> Double
diffTimeToDouble = fromRational . toRational
parserInfo :: ParserInfo Args
parserInfo = info (argParser <**> helper)
( fullDesc
<> progDesc ("Find differences between two cabal commands when solving"
++ " for all packages on Hackage.")
<> header "hackage-benchmark" )
argParser :: Parser Args
argParser = Args
<$> strOption
( long "cabal1"
<> metavar "PATH"
<> help "First cabal executable")
<*> strOption
( long "cabal2"
<> metavar "PATH"
<> help "Second cabal executable")
<*> option (words <$> str)
( long "cabal1-flags"
<> value []
<> metavar "FLAGS"
<> help "Extra flags for the first cabal executable")
<*> option (words <$> str)
( long "cabal2-flags"
<> value []
<> metavar "FLAGS"
<> help "Extra flags for the second cabal executable")
<*> option (map mkPackageName . words <$> str)
( long "packages"
<> value []
<> metavar "PACKAGES"
<> help ("Space separated list of packages to test, or all of Hackage"
++ " if unspecified"))
<*> option auto
( long "min-run-time-percentage-difference-to-rerun"
<> showDefault
<> value 0.0
<> metavar "PERCENTAGE"
<> help ("Stop testing a package when the difference in run times in"
++ " the first trial are within this percentage, in order to"
++ " save time"))
<*> option (mkPValue <$> auto)
( long "pvalue"
<> showDefault
<> value (mkPValue 0.05)
<> metavar "DOUBLE"
<> help ("p-value used to determine whether to print the results for"
++ " each package"))
<*> option auto
( long "trials"
<> showDefault
<> value 10
<> metavar "N"
<> help "Number of trials for each package")
<*> switch
( long "concurrently"
<> help "Run cabals concurrently")
<*> switch
( long "print-trials"
<> help "Whether to include the results from individual trials in the output")
<*> switch
( long "print-skipped-packages"
<> help "Whether to include skipped packages in the output")
<*> option auto
( long "timeout"
<> showDefault
<> value 90
<> metavar "SECONDS"
<> help "Maximum time to run a cabal command, in seconds")