Merge pull request #9898 from haskell/mergify/bp/3.12/pr-9865
[cabal.git] / solver-benchmarks / HackageBenchmark.hs
blobd70c25feebc11ccbb768de138076c92afe9b04a7
1 {-# OPTIONS_GHC -fno-warn-orphans #-}
2 {-# LANGUAGE RecordWildCards #-}
3 {-# LANGUAGE TupleSections #-}
4 {-# LANGUAGE ScopedTypeVariables #-}
6 module HackageBenchmark (
7 hackageBenchmarkMain
9 -- Exposed for testing:
10 , CabalResult(..)
11 , isSignificantTimeDifference
12 , combineTrialResults
13 , isSignificantResult
14 , shouldContinueAfterFirstTrial
15 ) where
17 import Control.Concurrent.Async (concurrently)
18 import Control.Monad (forM, replicateM, unless, when)
19 import qualified Data.ByteString as BS
20 import Data.List (nub, unzip4)
21 import Data.Maybe (isJust, catMaybes)
22 import Data.Monoid ((<>))
23 import Data.String (fromString)
24 import Data.Function ((&))
25 import Data.Time (NominalDiffTime, diffUTCTime, getCurrentTime)
26 import qualified Data.Vector.Unboxed as V
27 import Options.Applicative
28 import Statistics.Sample (mean, stdDev, geometricMean)
29 import Statistics.Test.MannWhitneyU ( PositionTest(..), TestResult(..)
30 , mannWhitneyUCriticalValue
31 , mannWhitneyUtest)
32 import Statistics.Types (PValue, mkPValue)
33 import System.Directory (getTemporaryDirectory, createDirectoryIfMissing)
34 import System.Environment (getEnvironment)
35 import System.Exit (ExitCode(..), exitWith, exitFailure)
36 import System.FilePath ((</>))
37 import System.IO ( BufferMode(LineBuffering), hPutStrLn, hSetBuffering, stderr
38 , stdout)
39 import System.Process ( StdStream(CreatePipe), CreateProcess(..), callProcess
40 , createProcess, readProcess, shell, waitForProcess, proc, readCreateProcessWithExitCode )
41 import Text.Printf (printf)
43 import qualified Data.Map.Strict as Map
45 import Distribution.Package (PackageName, mkPackageName, unPackageName)
47 data Args = Args {
48 argCabal1 :: FilePath
49 , argCabal2 :: FilePath
50 , argCabal1Flags :: [String]
51 , argCabal2Flags :: [String]
52 , argPackages :: [PackageName]
53 , argMinRunTimeDifferenceToRerun :: Double
54 , argPValue :: PValue Double
55 , argTrials :: Int
56 , argConcurrently :: Bool
57 , argPrintTrials :: Bool
58 , argPrintSkippedPackages :: Bool
59 , argTimeoutSeconds :: Int
62 data CabalTrial = CabalTrial NominalDiffTime CabalResult
64 data CabalResult
65 = Solution
66 | NoInstallPlan
67 | BackjumpLimit
68 | Unbuildable
69 | UnbuildableDep
70 | ComponentCycle
71 | ModReexpIssue
72 | PkgNotFound
73 | Timeout
74 | Unknown
75 deriving (Eq, Show)
77 hackageBenchmarkMain :: IO ()
78 hackageBenchmarkMain = do
79 hSetBuffering stdout LineBuffering
80 args@Args {..} <- execParser parserInfo
81 checkArgs args
82 printConfig args
83 pkgs <- getPackages args
84 putStrLn ""
86 let concurrently' :: IO a -> IO b -> IO (a, b)
87 concurrently' | argConcurrently = concurrently
88 | otherwise = \ma mb -> do { a <- ma; b <- mb; return (a, b) }
90 let -- The maximum length of the heading and package names.
91 nameColumnWidth :: Int
92 nameColumnWidth =
93 maximum $ map length $ "package" : map unPackageName pkgs
95 -- create cabal runners
96 runCabal1 <- runCabal argTimeoutSeconds CabalUnderTest1 argCabal1 argCabal1Flags
97 runCabal2 <- runCabal argTimeoutSeconds CabalUnderTest2 argCabal2 argCabal2Flags
99 -- When the output contains both trails and summaries, label each row as
100 -- "trial" or "summary".
101 when argPrintTrials $ putStr $ printf "%-16s " "trial/summary"
102 putStrLn $
103 printf "%-*s %-14s %-14s %11s %11s %11s %11s %11s"
104 nameColumnWidth "package" "result1" "result2"
105 "mean1" "mean2" "stddev1" "stddev2" "speedup"
107 speedups :: [Double] <- fmap catMaybes $ forM pkgs $ \pkg -> do
108 let printTrial msgType result1 result2 time1 time2 =
109 putStrLn $
110 printf "%-16s %-*s %-14s %-14s %10.3fs %10.3fs"
111 msgType nameColumnWidth (unPackageName pkg)
112 (show result1) (show result2)
113 (diffTimeToDouble time1) (diffTimeToDouble time2)
115 (CabalTrial t1 r1, CabalTrial t2 r2) <- runCabal1 pkg `concurrently'` runCabal2 pkg
117 if not $
118 shouldContinueAfterFirstTrial argMinRunTimeDifferenceToRerun t1 t2 r1 r2
119 then do
120 when argPrintSkippedPackages $
121 if argPrintTrials
122 then printTrial "trial (skipping)" r1 r2 t1 t2
123 else putStrLn $ printf "%-*s (first run times were too similar)"
124 nameColumnWidth (unPackageName pkg)
125 return Nothing
126 else do
127 when argPrintTrials $ printTrial "trial" r1 r2 t1 t2
128 (ts1, ts2, rs1, rs2) <- (unzip4 . ((t1, t2, r1, r2) :) <$>)
129 . replicateM (argTrials - 1) $ do
131 (CabalTrial t1' r1', CabalTrial t2' r2') <- runCabal1 pkg `concurrently'` runCabal2 pkg
132 when argPrintTrials $ printTrial "trial" r1' r2' t1' t2'
133 return (t1', t2', r1', r2')
135 let result1 = combineTrialResults rs1
136 result2 = combineTrialResults rs2
137 times1 = V.fromList (map diffTimeToDouble ts1)
138 times2 = V.fromList (map diffTimeToDouble ts2)
139 mean1 = mean times1
140 mean2 = mean times2
141 stddev1 = stdDev times1
142 stddev2 = stdDev times2
143 speedup = mean1 / mean2
145 when argPrintTrials $ putStr $ printf "%-16s " "summary"
146 if isSignificantResult result1 result2
147 || isSignificantTimeDifference argPValue ts1 ts2
148 then putStrLn $
149 printf "%-*s %-14s %-14s %10.3fs %10.3fs %10.3fs %10.3fs %10.3f"
150 nameColumnWidth (unPackageName pkg)
151 (show result1) (show result2) mean1 mean2 stddev1 stddev2 speedup
152 else when (argPrintTrials || argPrintSkippedPackages) $
153 putStrLn $
154 printf "%-*s (not significant, speedup = %10.3f)" nameColumnWidth (unPackageName pkg) speedup
156 -- return speedup value
157 return (Just speedup)
159 -- finally, calculate the geometric mean of speedups
160 printf "Geometric mean of %d packages' speedups is %10.3f\n" (length speedups) (geometricMean (V.fromList speedups))
162 where
163 checkArgs :: Args -> IO ()
164 checkArgs Args {..} = do
165 let die msg = hPutStrLn stderr msg >> exitFailure
166 unless (argTrials > 0) $ die "--trials must be greater than 0."
167 unless (argMinRunTimeDifferenceToRerun >= 0) $
168 die "--min-run-time-percentage-difference-to-rerun must be non-negative."
169 unless (isSampleLargeEnough argPValue argTrials) $
170 die "p-value is too small for the number of trials."
172 printConfig :: Args -> IO ()
173 printConfig Args {..} = do
174 putStrLn "Comparing:"
175 putStrLn $ "1: " ++ argCabal1 ++ " " ++ unwords argCabal1Flags
176 callProcess argCabal1 ["--version"]
177 putStrLn $ "2: " ++ argCabal2 ++ " " ++ unwords argCabal2Flags
178 callProcess argCabal2 ["--version"]
179 -- TODO: Print index state.
180 putStrLn "Base package database:"
181 callProcess "ghc-pkg" ["list"]
183 getPackages :: Args -> IO [PackageName]
184 getPackages Args {..} = do
185 pkgs <-
186 if null argPackages
187 then do
188 putStrLn $ "Obtaining the package list (using " ++ argCabal1 ++ ") ..."
189 list <- readProcess argCabal1 ["list", "--simple-output"] ""
190 return $ nub [mkPackageName $ head (words line) | line <- lines list]
191 else do
192 putStrLn "Using given package list ..."
193 return argPackages
194 putStrLn $ "Done, got " ++ show (length pkgs) ++ " packages."
195 return pkgs
197 data CabalUnderTest = CabalUnderTest1 | CabalUnderTest2
199 runCabal
200 :: Int -- ^ timeout in seconds
201 -> CabalUnderTest -- ^ cabal under test
202 -> FilePath -- ^ cabal
203 -> [String] -- ^ flags
204 -> IO (PackageName -> IO CabalTrial) -- ^ testing function.
205 runCabal timeoutSeconds cabalUnderTest cabal flags = do
206 tmpDir <- getTemporaryDirectory
208 -- cabal directory for this cabal under test
209 let cabalDir = tmpDir </> "solver-benchmarks-workdir" </> case cabalUnderTest of
210 CabalUnderTest1 -> "cabal1"
211 CabalUnderTest2 -> "cabal2"
213 putStrLn $ "Cabal directory (for " ++ cabal ++ ") " ++ cabalDir
214 createDirectoryIfMissing True cabalDir
216 -- shell environment
217 currEnv <- Map.fromList <$> getEnvironment
218 let thisEnv :: [(String, String)]
219 thisEnv = Map.toList $ currEnv
220 & Map.insert "CABAL_CONFIG" (cabalDir </> "config")
221 & Map.insert "CABAL_DIR" cabalDir
223 -- Initialize the config file, whether or not it already exists
224 runCabalCmdWithEnv cabalDir thisEnv ["user-config", "init", "--force"]
226 -- Run cabal update
227 putStrLn $ "Running cabal update (using " ++ cabal ++ ") ..."
228 runCabalCmdWithEnv cabalDir thisEnv ["update"]
230 -- return an actual runner
231 return $ \pkg -> do
232 ((exitCode, err), time) <- timeEvent $ do
234 let timeout = "timeout --foreground -sINT " ++ show timeoutSeconds
235 cabalCmd = unwords $
236 [ cabal
238 , "install"
240 -- These flags prevent a Cabal project or package environment from
241 -- affecting the install plan.
243 -- Note: we are somewhere in /tmp, hopefully there is no cabal.project on upper level
244 , "--package-env=non-existent-package-env"
246 -- --lib allows solving for packages with libraries or
247 -- executables.
248 , "--lib"
250 , unPackageName pkg
252 , "--dry-run"
254 -- The test doesn't currently handle stdout, so we suppress it
255 -- with silent. nowrap simplifies parsing the errors messages.
256 , "-vsilent+nowrap"
260 ++ flags
262 cmd = (shell (timeout ++ " " ++ cabalCmd))
263 { std_err = CreatePipe
264 , env = Just thisEnv
265 , cwd = Just cabalDir
268 -- TODO: Read stdout and compare the install plans.
269 (_, _, Just errh, ph) <- createProcess cmd
270 err <- BS.hGetContents errh
271 (, err) <$> waitForProcess ph
273 let exhaustiveMsg =
274 "After searching the rest of the dependency tree exhaustively"
275 result
276 | exitCode == ExitSuccess = Solution
277 | exitCode == ExitFailure 124 = Timeout
278 | fromString exhaustiveMsg `BS.isInfixOf` err = NoInstallPlan
279 | fromString "Backjump limit reached" `BS.isInfixOf` err = BackjumpLimit
280 | fromString "none of the components are available to build" `BS.isInfixOf` err = Unbuildable
281 | fromString "Dependency on unbuildable" `BS.isInfixOf` err = UnbuildableDep
282 | fromString "Dependency cycle between the following components" `BS.isInfixOf` err = ComponentCycle
283 | fromString "Problem with module re-exports" `BS.isInfixOf` err = ModReexpIssue
284 | fromString "There is no package named" `BS.isInfixOf` err = PkgNotFound
285 | otherwise = Unknown
286 return (CabalTrial time result)
287 where
288 runCabalCmdWithEnv cabalDir thisEnv args = do
289 (ec, uout, uerr) <- readCreateProcessWithExitCode (proc cabal args)
290 { cwd = Just cabalDir
291 , env = Just thisEnv
294 unless (ec == ExitSuccess) $ do
295 putStrLn uout
296 putStrLn uerr
297 exitWith ec
299 isSampleLargeEnough :: PValue Double -> Int -> Bool
300 isSampleLargeEnough pvalue trials =
301 -- mannWhitneyUCriticalValue, which can fail with too few samples, is only
302 -- used when both sample sizes are less than or equal to 20.
303 trials > 20 || isJust (mannWhitneyUCriticalValue (trials, trials) pvalue)
305 isSignificantTimeDifference :: PValue Double -> [NominalDiffTime] -> [NominalDiffTime] -> Bool
306 isSignificantTimeDifference pvalue xs ys =
307 let toVector = V.fromList . map diffTimeToDouble
308 in case mannWhitneyUtest SamplesDiffer pvalue (toVector xs) (toVector ys) of
309 Nothing -> error "not enough data for mannWhitneyUtest"
310 Just Significant -> True
311 Just NotSignificant -> False
313 -- Should we stop after the first trial of this package to save time? This
314 -- function skips the package if the results are uninteresting and the times are
315 -- within --min-run-time-percentage-difference-to-rerun.
316 shouldContinueAfterFirstTrial :: Double
317 -> NominalDiffTime
318 -> NominalDiffTime
319 -> CabalResult
320 -> CabalResult
321 -> Bool
322 shouldContinueAfterFirstTrial 0 _ _ _ _ = True
323 shouldContinueAfterFirstTrial _ _ _ Timeout Timeout = False
324 shouldContinueAfterFirstTrial maxRunTimeDifferenceToIgnore t1 t2 r1 r2 =
325 isSignificantResult r1 r2
326 || abs (t1 - t2) / min t1 t2 >= realToFrac (maxRunTimeDifferenceToIgnore / 100)
328 isSignificantResult :: CabalResult -> CabalResult -> Bool
329 isSignificantResult r1 r2 = r1 /= r2 || not (isExpectedResult r1)
331 -- Is this result expected in a benchmark run on all of Hackage?
332 isExpectedResult :: CabalResult -> Bool
333 isExpectedResult Solution = True
334 isExpectedResult NoInstallPlan = True
335 isExpectedResult BackjumpLimit = True
336 isExpectedResult Timeout = True
337 isExpectedResult Unbuildable = True
338 isExpectedResult UnbuildableDep = True
339 isExpectedResult ComponentCycle = True
340 isExpectedResult ModReexpIssue = True
341 isExpectedResult PkgNotFound = False
342 isExpectedResult Unknown = False
344 -- Combine CabalResults from multiple trials. Ignoring timeouts, all results
345 -- should be the same. If they aren't the same, we returns Unknown.
346 combineTrialResults :: [CabalResult] -> CabalResult
347 combineTrialResults rs
348 | allEqual rs = head rs
349 | allEqual [r | r <- rs, r /= Timeout] = Timeout
350 | otherwise = Unknown
351 where
352 allEqual :: Eq a => [a] -> Bool
353 allEqual xs = length (nub xs) == 1
355 timeEvent :: IO a -> IO (a, NominalDiffTime)
356 timeEvent task = do
357 start <- getCurrentTime
358 r <- task
359 end <- getCurrentTime
360 return (r, diffUTCTime end start)
362 diffTimeToDouble :: NominalDiffTime -> Double
363 diffTimeToDouble = fromRational . toRational
365 parserInfo :: ParserInfo Args
366 parserInfo = info (argParser <**> helper)
367 ( fullDesc
368 <> progDesc ("Find differences between two cabal commands when solving"
369 ++ " for all packages on Hackage.")
370 <> header "hackage-benchmark" )
372 argParser :: Parser Args
373 argParser = Args
374 <$> strOption
375 ( long "cabal1"
376 <> metavar "PATH"
377 <> help "First cabal executable")
378 <*> strOption
379 ( long "cabal2"
380 <> metavar "PATH"
381 <> help "Second cabal executable")
382 <*> option (words <$> str)
383 ( long "cabal1-flags"
384 <> value []
385 <> metavar "FLAGS"
386 <> help "Extra flags for the first cabal executable")
387 <*> option (words <$> str)
388 ( long "cabal2-flags"
389 <> value []
390 <> metavar "FLAGS"
391 <> help "Extra flags for the second cabal executable")
392 <*> option (map mkPackageName . words <$> str)
393 ( long "packages"
394 <> value []
395 <> metavar "PACKAGES"
396 <> help ("Space separated list of packages to test, or all of Hackage"
397 ++ " if unspecified"))
398 <*> option auto
399 ( long "min-run-time-percentage-difference-to-rerun"
400 <> showDefault
401 <> value 0.0
402 <> metavar "PERCENTAGE"
403 <> help ("Stop testing a package when the difference in run times in"
404 ++ " the first trial are within this percentage, in order to"
405 ++ " save time"))
406 <*> option (mkPValue <$> auto)
407 ( long "pvalue"
408 <> showDefault
409 <> value (mkPValue 0.05)
410 <> metavar "DOUBLE"
411 <> help ("p-value used to determine whether to print the results for"
412 ++ " each package"))
413 <*> option auto
414 ( long "trials"
415 <> showDefault
416 <> value 10
417 <> metavar "N"
418 <> help "Number of trials for each package")
419 <*> switch
420 ( long "concurrently"
421 <> help "Run cabals concurrently")
422 <*> switch
423 ( long "print-trials"
424 <> help "Whether to include the results from individual trials in the output")
425 <*> switch
426 ( long "print-skipped-packages"
427 <> help "Whether to include skipped packages in the output")
428 <*> option auto
429 ( long "timeout"
430 <> showDefault
431 <> value 90
432 <> metavar "SECONDS"
433 <> help "Maximum time to run a cabal command, in seconds")