HackageBenchmark.hs 16.4 KB
Newer Older
1
2
3
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TupleSections #-}
4
{-# LANGUAGE ScopedTypeVariables #-}
5
6
7
8
9
10
11
12
13
14
15
16

module HackageBenchmark (
    hackageBenchmarkMain

-- Exposed for testing:
  , CabalResult(..)
  , isSignificantTimeDifference
  , combineTrialResults
  , isSignificantResult
  , shouldContinueAfterFirstTrial
  ) where

17
import Control.Concurrent.Async (concurrently)
18
19
import Control.Monad (forM, replicateM, unless, when)
import qualified Data.ByteString as BS
20
import Data.List (nub, unzip4)
21
import Data.Maybe (isJust, catMaybes)
22
23
import Data.Monoid ((<>))
import Data.String (fromString)
24
import Data.Function ((&))
25
26
27
import Data.Time (NominalDiffTime, diffUTCTime, getCurrentTime)
import qualified Data.Vector.Unboxed as V
import Options.Applicative
28
import Statistics.Sample (mean, stdDev, geometricMean)
29
30
31
32
import Statistics.Test.MannWhitneyU ( PositionTest(..), TestResult(..)
                                    , mannWhitneyUCriticalValue
                                    , mannWhitneyUtest)
import Statistics.Types (PValue, mkPValue)
33
34
35
import System.Directory (getTemporaryDirectory, createDirectoryIfMissing)
import System.Environment (getEnvironment)
import System.Exit (ExitCode(..), exitWith, exitFailure)
36
import System.FilePath ((</>))
37
38
39
import System.IO ( BufferMode(LineBuffering), hPutStrLn, hSetBuffering, stderr
                 , stdout)
import System.Process ( StdStream(CreatePipe), CreateProcess(..), callProcess
40
                      , createProcess, readProcess, shell, waitForProcess, proc, readCreateProcessWithExitCode )
41
42
import Text.Printf (printf)

43
44
import qualified Data.Map.Strict as Map

45
46
47
48
49
50
51
52
53
54
55
import Distribution.Package (PackageName, mkPackageName, unPackageName)

data Args = Args {
    argCabal1                      :: FilePath
  , argCabal2                      :: FilePath
  , argCabal1Flags                 :: [String]
  , argCabal2Flags                 :: [String]
  , argPackages                    :: [PackageName]
  , argMinRunTimeDifferenceToRerun :: Double
  , argPValue                      :: PValue Double
  , argTrials                      :: Int
56
  , argConcurrently                :: Bool
57
58
59
60
61
62
63
64
65
66
67
  , argPrintTrials                 :: Bool
  , argPrintSkippedPackages        :: Bool
  , argTimeoutSeconds              :: Int
  }

data CabalTrial = CabalTrial NominalDiffTime CabalResult

data CabalResult
  = Solution
  | NoInstallPlan
  | BackjumpLimit
68
69
70
71
  | Unbuildable
  | UnbuildableDep
  | ComponentCycle
  | ModReexpIssue
72
73
74
75
76
77
78
79
80
81
82
83
84
85
  | PkgNotFound
  | Timeout
  | Unknown
  deriving (Eq, Show)

hackageBenchmarkMain :: IO ()
hackageBenchmarkMain = do
  hSetBuffering stdout LineBuffering
  args@Args {..} <- execParser parserInfo
  checkArgs args
  printConfig args
  pkgs <- getPackages args
  putStrLn ""

86
87
88
89
  let concurrently' :: IO a -> IO b -> IO (a, b)
      concurrently' | argConcurrently = concurrently
                    | otherwise       = \ma mb -> do { a <- ma; b <- mb; return (a, b) }

90
91
92
93
  let -- The maximum length of the heading and package names.
      nameColumnWidth :: Int
      nameColumnWidth =
          maximum $ map length $ "package" : map unPackageName pkgs
94
95
96
97

  -- create cabal runners
  runCabal1 <- runCabal argTimeoutSeconds CabalUnderTest1 argCabal1 argCabal1Flags
  runCabal2 <- runCabal argTimeoutSeconds CabalUnderTest2 argCabal2 argCabal2Flags
98
99
100
101
102

  -- When the output contains both trails and summaries, label each row as
  -- "trial" or "summary".
  when argPrintTrials $ putStr $ printf "%-16s " "trial/summary"
  putStrLn $
103
      printf "%-*s %-14s %-14s %11s %11s %11s %11s %11s"
104
105
106
             nameColumnWidth "package" "result1" "result2"
             "mean1" "mean2" "stddev1" "stddev2" "speedup"

107
  speedups :: [Double] <- fmap catMaybes $ forM pkgs $ \pkg -> do
108
109
    let printTrial msgType result1 result2 time1 time2 =
            putStrLn $
110
            printf "%-16s %-*s %-14s %-14s %10.3fs %10.3fs"
111
112
113
114
                   msgType nameColumnWidth (unPackageName pkg)
                   (show result1) (show result2)
                   (diffTimeToDouble time1) (diffTimeToDouble time2)

115
    (CabalTrial t1 r1, CabalTrial t2 r2) <- runCabal1 pkg `concurrently'` runCabal2 pkg
116

117
118
    if not $
       shouldContinueAfterFirstTrial argMinRunTimeDifferenceToRerun t1 t2 r1 r2
119
120
    then do
      when argPrintSkippedPackages $
121
122
123
124
         if argPrintTrials
         then printTrial "trial (skipping)" r1 r2 t1 t2
         else putStrLn $ printf "%-*s (first run times were too similar)"
                                nameColumnWidth (unPackageName pkg)
125
      return Nothing
126
127
128
129
    else do
      when argPrintTrials $ printTrial "trial" r1 r2 t1 t2
      (ts1, ts2, rs1, rs2) <- (unzip4 . ((t1, t2, r1, r2) :) <$>)
                            . replicateM (argTrials - 1) $ do
130
131

        (CabalTrial t1' r1', CabalTrial t2' r2') <- runCabal1 pkg `concurrently'` runCabal2 pkg
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
        when argPrintTrials $ printTrial "trial" r1' r2' t1' t2'
        return (t1', t2', r1', r2')

      let result1 = combineTrialResults rs1
          result2 = combineTrialResults rs2
          times1 = V.fromList (map diffTimeToDouble ts1)
          times2 = V.fromList (map diffTimeToDouble ts2)
          mean1 = mean times1
          mean2 = mean times2
          stddev1 = stdDev times1
          stddev2 = stdDev times2
          speedup = mean1 / mean2

      when argPrintTrials $ putStr $ printf "%-16s " "summary"
      if isSignificantResult result1 result2
          || isSignificantTimeDifference argPValue ts1 ts2
      then putStrLn $
149
           printf "%-*s %-14s %-14s %10.3fs %10.3fs %10.3fs %10.3fs %10.3f"
150
151
152
153
                  nameColumnWidth (unPackageName pkg)
                  (show result1) (show result2) mean1 mean2 stddev1 stddev2 speedup
      else when (argPrintTrials || argPrintSkippedPackages) $
           putStrLn $
154
155
156
157
158
159
160
161
           printf "%-*s (not significant, speedup = %10.3f)" nameColumnWidth (unPackageName pkg) speedup

      -- return speedup value
      return (Just speedup)

  -- finally, calculate the geometric mean of speedups
  printf "Geometric mean of %d packages' speedups is %10.3f\n" (length speedups) (geometricMean (V.fromList speedups))

162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
  where
    checkArgs :: Args -> IO ()
    checkArgs Args {..} = do
      let die msg = hPutStrLn stderr msg >> exitFailure
      unless (argTrials > 0) $ die "--trials must be greater than 0."
      unless (argMinRunTimeDifferenceToRerun >= 0) $
          die "--min-run-time-percentage-difference-to-rerun must be non-negative."
      unless (isSampleLargeEnough argPValue argTrials) $
          die "p-value is too small for the number of trials."

    printConfig :: Args -> IO ()
    printConfig Args {..} = do
      putStrLn "Comparing:"
      putStrLn $ "1: " ++ argCabal1 ++ " " ++ unwords argCabal1Flags
      callProcess argCabal1 ["--version"]
      putStrLn $ "2: " ++ argCabal2 ++ " " ++ unwords argCabal2Flags
      callProcess argCabal2 ["--version"]
      -- TODO: Print index state.
      putStrLn "Base package database:"
      callProcess "ghc-pkg" ["list"]

    getPackages :: Args -> IO [PackageName]
    getPackages Args {..} = do
      pkgs <-
          if null argPackages
          then do
            putStrLn $ "Obtaining the package list (using " ++ argCabal1 ++ ") ..."
            list <- readProcess argCabal1 ["list", "--simple-output"] ""
            return $ nub [mkPackageName $ head (words line) | line <- lines list]
          else do
            putStrLn "Using given package list ..."
            return argPackages
      putStrLn $ "Done, got " ++ show (length pkgs) ++ " packages."
      return pkgs

197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
data CabalUnderTest = CabalUnderTest1 | CabalUnderTest2

runCabal
    :: Int             -- ^ timeout in seconds
    -> CabalUnderTest  -- ^ cabal under test
    -> FilePath        -- ^ cabal
    -> [String]        -- ^ flags
    -> IO (PackageName  -> IO CabalTrial)  -- ^ testing function.
runCabal timeoutSeconds cabalUnderTest cabal flags = do
  tmpDir <- getTemporaryDirectory

  -- cabal directory for this cabal under test
  let cabalDir = tmpDir </> "solver-benchmarks-workdir" </> case cabalUnderTest of
          CabalUnderTest1 -> "cabal1"
          CabalUnderTest2 -> "cabal2"

  putStrLn $ "Cabal directory (for " ++ cabal ++ ") " ++ cabalDir
  createDirectoryIfMissing True cabalDir

  -- shell enviroment
  currEnv <- Map.fromList <$>  getEnvironment
  let thisEnv :: [(String, String)]
      thisEnv = Map.toList $ currEnv
          & Map.insert "CABAL_CONFIG" (cabalDir </> "config")
          & Map.insert "CABAL_DIR"     cabalDir

  -- Run cabal update, 
  putStrLn $ "Running cabal update (using " ++ cabal ++ ") ..."
  (ec, uout, uerr) <- readCreateProcessWithExitCode (proc cabal ["update"])
      { cwd = Just cabalDir
      , env = Just thisEnv
      }
      ""
  unless (ec == ExitSuccess) $ do
      putStrLn uout
      putStrLn uerr
      exitWith ec

  -- return an actual runner
  return $ \pkg -> do
    ((exitCode, err), time) <- timeEvent $ do

      let timeout = "timeout --foreground -sINT " ++ show timeoutSeconds
          cabalCmd = unwords $
              [ cabal

              , "v2-install"

                -- These flags prevent a Cabal project or package environment from
                -- affecting the install plan.
                --
                -- Note: we are somewhere in /tmp, hopefully there is no cabal.project on upper level
              , "--package-env=non-existent-package-env"

                -- --lib allows solving for packages with libraries or
                -- executables.
              , "--lib"

              , unPackageName pkg

              , "--dry-run"

                -- The test doesn't currently handle stdout, so we suppress it
                -- with silent. nowrap simplifies parsing the errors messages.
              , "-vsilent+nowrap"

              ]

               ++ flags

          cmd = (shell (timeout ++ " " ++ cabalCmd))
              { std_err = CreatePipe
              , env = Just thisEnv
              , cwd = Just cabalDir
              }

      -- TODO: Read stdout and compare the install plans.
      (_, _, Just errh, ph) <- createProcess cmd
      err <- BS.hGetContents errh
      (, err) <$> waitForProcess ph

    let exhaustiveMsg =
            "After searching the rest of the dependency tree exhaustively"
        result
          | exitCode == ExitSuccess                                                          = Solution
          | exitCode == ExitFailure 124                                                      = Timeout
          | fromString exhaustiveMsg `BS.isInfixOf` err                                       = NoInstallPlan
          | fromString "Backjump limit reached" `BS.isInfixOf` err                            = BackjumpLimit
          | fromString "none of the components are available to build" `BS.isInfixOf` err     = Unbuildable
          | fromString "Dependency on unbuildable" `BS.isInfixOf` err                         = UnbuildableDep
          | fromString "Dependency cycle between the following components" `BS.isInfixOf` err = ComponentCycle
          | fromString "Problem with module re-exports" `BS.isInfixOf` err                    = ModReexpIssue
          | fromString "There is no package named" `BS.isInfixOf` err                         = PkgNotFound
          | otherwise                                                                        = Unknown
    return (CabalTrial time result)
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326

isSampleLargeEnough :: PValue Double -> Int -> Bool
isSampleLargeEnough pvalue trials =
    -- mannWhitneyUCriticalValue, which can fail with too few samples, is only
    -- used when both sample sizes are less than or equal to 20.
    trials > 20 || isJust (mannWhitneyUCriticalValue (trials, trials) pvalue)

isSignificantTimeDifference :: PValue Double -> [NominalDiffTime] -> [NominalDiffTime] -> Bool
isSignificantTimeDifference pvalue xs ys =
  let toVector = V.fromList . map diffTimeToDouble
  in case mannWhitneyUtest SamplesDiffer pvalue (toVector xs) (toVector ys) of
       Nothing             -> error "not enough data for mannWhitneyUtest"
       Just Significant    -> True
       Just NotSignificant -> False

-- Should we stop after the first trial of this package to save time? This
-- function skips the package if the results are uninteresting and the times are
-- within --min-run-time-percentage-difference-to-rerun.
shouldContinueAfterFirstTrial :: Double
                              -> NominalDiffTime
                              -> NominalDiffTime
                              -> CabalResult
                              -> CabalResult
                              -> Bool
shouldContinueAfterFirstTrial 0                            _  _  _       _       = True
shouldContinueAfterFirstTrial _                            _  _  Timeout Timeout = False
shouldContinueAfterFirstTrial maxRunTimeDifferenceToIgnore t1 t2 r1      r2      =
    isSignificantResult r1 r2
 || abs (t1 - t2) / min t1 t2 >= realToFrac (maxRunTimeDifferenceToIgnore / 100)

isSignificantResult :: CabalResult -> CabalResult -> Bool
isSignificantResult r1 r2 = r1 /= r2 || not (isExpectedResult r1)

-- Is this result expected in a benchmark run on all of Hackage?
isExpectedResult :: CabalResult -> Bool
327
328
329
330
331
332
333
334
335
336
isExpectedResult Solution       = True
isExpectedResult NoInstallPlan  = True
isExpectedResult BackjumpLimit  = True
isExpectedResult Timeout        = True
isExpectedResult Unbuildable    = True
isExpectedResult UnbuildableDep = True
isExpectedResult ComponentCycle = True
isExpectedResult ModReexpIssue  = True
isExpectedResult PkgNotFound    = False
isExpectedResult Unknown        = False
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412

-- Combine CabalResults from multiple trials. Ignoring timeouts, all results
-- should be the same. If they aren't the same, we returns Unknown.
combineTrialResults :: [CabalResult] -> CabalResult
combineTrialResults rs
  | allEqual rs                          = head rs
  | allEqual [r | r <- rs, r /= Timeout] = Timeout
  | otherwise                            = Unknown
  where
    allEqual :: Eq a => [a] -> Bool
    allEqual xs = length (nub xs) == 1

timeEvent :: IO a -> IO (a, NominalDiffTime)
timeEvent task = do
  start <- getCurrentTime
  r <- task
  end <- getCurrentTime
  return (r, diffUTCTime end start)

diffTimeToDouble :: NominalDiffTime -> Double
diffTimeToDouble = fromRational . toRational

parserInfo :: ParserInfo Args
parserInfo = info (argParser <**> helper)
     ( fullDesc
    <> progDesc ("Find differences between two cabal commands when solving"
                   ++ " for all packages on Hackage.")
    <> header "hackage-benchmark" )

argParser :: Parser Args
argParser = Args
    <$> strOption
         ( long "cabal1"
        <> metavar "PATH"
        <> help "First cabal executable")
    <*> strOption
         ( long "cabal2"
        <> metavar "PATH"
        <> help "Second cabal executable")
    <*> option (words <$> str)
         ( long "cabal1-flags"
        <> value []
        <> metavar "FLAGS"
        <> help "Extra flags for the first cabal executable")
    <*> option (words <$> str)
         ( long "cabal2-flags"
        <> value []
        <> metavar "FLAGS"
        <> help "Extra flags for the second cabal executable")
    <*> option (map mkPackageName . words <$> str)
         ( long "packages"
        <> value []
        <> metavar "PACKAGES"
        <> help ("Space separated list of packages to test, or all of Hackage"
                   ++ " if unspecified"))
    <*> option auto
         ( long "min-run-time-percentage-difference-to-rerun"
        <> showDefault
        <> value 0.0
        <> metavar "PERCENTAGE"
        <> help ("Stop testing a package when the difference in run times in"
                   ++ " the first trial are within this percentage, in order to"
                   ++ " save time"))
    <*> option (mkPValue <$> auto)
         ( long "pvalue"
        <> showDefault
        <> value (mkPValue 0.05)
        <> metavar "DOUBLE"
        <> help ("p-value used to determine whether to print the results for"
                   ++ " each package"))
    <*> option auto
         ( long "trials"
        <> showDefault
        <> value 10
        <> metavar "N"
        <> help "Number of trials for each package")
413
414
415
    <*> switch
         ( long "concurrently"
        <> help "Run cabals concurrently")
416
417
418
419
420
421
422
423
424
425
426
427
    <*> switch
         ( long "print-trials"
        <> help "Whether to include the results from individual trials in the output")
    <*> switch
         ( long "print-skipped-packages"
        <> help "Whether to include skipped packages in the output")
    <*> option auto
         ( long "timeout"
        <> showDefault
        <> value 90
        <> metavar "SECONDS"
        <> help "Maximum time to run a cabal command, in seconds")