make LTS branch pre-releases
[cabal.git] / solver-benchmarks / HackageBenchmark.hs
blob37996dbfc63fb48f2dbefc18a5e3d2e9385e8c97
1 {-# LANGUAGE RecordWildCards #-}
2 {-# LANGUAGE TupleSections #-}
3 {-# LANGUAGE ScopedTypeVariables #-}
5 module HackageBenchmark (
6 hackageBenchmarkMain
8 -- Exposed for testing:
9 , CabalResult(..)
10 , isSignificantTimeDifference
11 , combineTrialResults
12 , isSignificantResult
13 , shouldContinueAfterFirstTrial
14 ) where
16 import Control.Concurrent.Async (concurrently)
17 import Control.Monad (forM, replicateM, unless, when)
18 import qualified Data.ByteString as BS
19 import Data.List (nub, unzip4)
20 import Data.Maybe (isJust, catMaybes)
21 import Data.String (fromString)
22 import Data.Function ((&))
23 import Data.Time (NominalDiffTime, diffUTCTime, getCurrentTime)
24 import qualified Data.Vector.Unboxed as V
25 import Options.Applicative
26 import Statistics.Sample (mean, stdDev, geometricMean)
27 import Statistics.Test.MannWhitneyU ( PositionTest(..), TestResult(..)
28 , mannWhitneyUCriticalValue
29 , mannWhitneyUtest)
30 import Statistics.Types (PValue, mkPValue)
31 import System.Directory (getTemporaryDirectory, createDirectoryIfMissing)
32 import System.Environment (getEnvironment)
33 import System.Exit (ExitCode(..), exitWith, exitFailure)
34 import System.FilePath ((</>))
35 import System.IO ( BufferMode(LineBuffering), hPutStrLn, hSetBuffering, stderr
36 , stdout)
37 import System.Process ( StdStream(CreatePipe), CreateProcess(..), callProcess
38 , createProcess, readProcess, shell, waitForProcess, proc, readCreateProcessWithExitCode )
39 import Text.Printf (printf)
41 import qualified Data.Map.Strict as Map
43 import Distribution.Package (PackageName, mkPackageName, unPackageName)
45 data Args = Args {
46 argCabal1 :: FilePath
47 , argCabal2 :: FilePath
48 , argCabal1Flags :: [String]
49 , argCabal2Flags :: [String]
50 , argPackages :: [PackageName]
51 , argMinRunTimeDifferenceToRerun :: Double
52 , argPValue :: PValue Double
53 , argTrials :: Int
54 , argConcurrently :: Bool
55 , argPrintTrials :: Bool
56 , argPrintSkippedPackages :: Bool
57 , argTimeoutSeconds :: Int
60 data CabalTrial = CabalTrial NominalDiffTime CabalResult
62 data CabalResult
63 = Solution
64 | NoInstallPlan
65 | BackjumpLimit
66 | Unbuildable
67 | UnbuildableDep
68 | ComponentCycle
69 | ModReexpIssue
70 | PkgNotFound
71 | Timeout
72 | Unknown
73 deriving (Eq, Show)
75 hackageBenchmarkMain :: IO ()
76 hackageBenchmarkMain = do
77 hSetBuffering stdout LineBuffering
78 args@Args {..} <- execParser parserInfo
79 checkArgs args
80 printConfig args
81 pkgs <- getPackages args
82 putStrLn ""
84 let concurrently' :: IO a -> IO b -> IO (a, b)
85 concurrently' | argConcurrently = concurrently
86 | otherwise = \ma mb -> do { a <- ma; b <- mb; return (a, b) }
88 let -- The maximum length of the heading and package names.
89 nameColumnWidth :: Int
90 nameColumnWidth =
91 maximum $ map length $ "package" : map unPackageName pkgs
93 -- create cabal runners
94 runCabal1 <- runCabal argTimeoutSeconds CabalUnderTest1 argCabal1 argCabal1Flags
95 runCabal2 <- runCabal argTimeoutSeconds CabalUnderTest2 argCabal2 argCabal2Flags
97 -- When the output contains both trails and summaries, label each row as
98 -- "trial" or "summary".
99 when argPrintTrials $ putStr $ printf "%-16s " "trial/summary"
100 putStrLn $
101 printf "%-*s %-14s %-14s %11s %11s %11s %11s %11s"
102 nameColumnWidth "package" "result1" "result2"
103 "mean1" "mean2" "stddev1" "stddev2" "speedup"
105 speedups :: [Double] <- fmap catMaybes $ forM pkgs $ \pkg -> do
106 let printTrial msgType result1 result2 time1 time2 =
107 putStrLn $
108 printf "%-16s %-*s %-14s %-14s %10.3fs %10.3fs"
109 msgType nameColumnWidth (unPackageName pkg)
110 (show result1) (show result2)
111 (diffTimeToDouble time1) (diffTimeToDouble time2)
113 (CabalTrial t1 r1, CabalTrial t2 r2) <- runCabal1 pkg `concurrently'` runCabal2 pkg
115 if not $
116 shouldContinueAfterFirstTrial argMinRunTimeDifferenceToRerun t1 t2 r1 r2
117 then do
118 when argPrintSkippedPackages $
119 if argPrintTrials
120 then printTrial "trial (skipping)" r1 r2 t1 t2
121 else putStrLn $ printf "%-*s (first run times were too similar)"
122 nameColumnWidth (unPackageName pkg)
123 return Nothing
124 else do
125 when argPrintTrials $ printTrial "trial" r1 r2 t1 t2
126 (ts1, ts2, rs1, rs2) <- (unzip4 . ((t1, t2, r1, r2) :) <$>)
127 . replicateM (argTrials - 1) $ do
129 (CabalTrial t1' r1', CabalTrial t2' r2') <- runCabal1 pkg `concurrently'` runCabal2 pkg
130 when argPrintTrials $ printTrial "trial" r1' r2' t1' t2'
131 return (t1', t2', r1', r2')
133 let result1 = combineTrialResults rs1
134 result2 = combineTrialResults rs2
135 times1 = V.fromList (map diffTimeToDouble ts1)
136 times2 = V.fromList (map diffTimeToDouble ts2)
137 mean1 = mean times1
138 mean2 = mean times2
139 stddev1 = stdDev times1
140 stddev2 = stdDev times2
141 speedup = mean1 / mean2
143 when argPrintTrials $ putStr $ printf "%-16s " "summary"
144 if isSignificantResult result1 result2
145 || isSignificantTimeDifference argPValue ts1 ts2
146 then putStrLn $
147 printf "%-*s %-14s %-14s %10.3fs %10.3fs %10.3fs %10.3fs %10.3f"
148 nameColumnWidth (unPackageName pkg)
149 (show result1) (show result2) mean1 mean2 stddev1 stddev2 speedup
150 else when (argPrintTrials || argPrintSkippedPackages) $
151 putStrLn $
152 printf "%-*s (not significant, speedup = %10.3f)" nameColumnWidth (unPackageName pkg) speedup
154 -- return speedup value
155 return (Just speedup)
157 -- finally, calculate the geometric mean of speedups
158 printf "Geometric mean of %d packages' speedups is %10.3f\n" (length speedups) (geometricMean (V.fromList speedups))
160 where
161 checkArgs :: Args -> IO ()
162 checkArgs Args {..} = do
163 let die msg = hPutStrLn stderr msg >> exitFailure
164 unless (argTrials > 0) $ die "--trials must be greater than 0."
165 unless (argMinRunTimeDifferenceToRerun >= 0) $
166 die "--min-run-time-percentage-difference-to-rerun must be non-negative."
167 unless (isSampleLargeEnough argPValue argTrials) $
168 die "p-value is too small for the number of trials."
170 printConfig :: Args -> IO ()
171 printConfig Args {..} = do
172 putStrLn "Comparing:"
173 putStrLn $ "1: " ++ argCabal1 ++ " " ++ unwords argCabal1Flags
174 callProcess argCabal1 ["--version"]
175 putStrLn $ "2: " ++ argCabal2 ++ " " ++ unwords argCabal2Flags
176 callProcess argCabal2 ["--version"]
177 -- TODO: Print index state.
178 putStrLn "Base package database:"
179 callProcess "ghc-pkg" ["list"]
181 getPackages :: Args -> IO [PackageName]
182 getPackages Args {..} = do
183 pkgs <-
184 if null argPackages
185 then do
186 putStrLn $ "Obtaining the package list (using " ++ argCabal1 ++ ") ..."
187 list <- readProcess argCabal1 ["list", "--simple-output"] ""
188 return $ nub [mkPackageName n | n : _ <- words <$> lines list]
189 else do
190 putStrLn "Using given package list ..."
191 return argPackages
192 putStrLn $ "Done, got " ++ show (length pkgs) ++ " packages."
193 return pkgs
195 data CabalUnderTest = CabalUnderTest1 | CabalUnderTest2
197 runCabal
198 :: Int -- ^ timeout in seconds
199 -> CabalUnderTest -- ^ cabal under test
200 -> FilePath -- ^ cabal
201 -> [String] -- ^ flags
202 -> IO (PackageName -> IO CabalTrial) -- ^ testing function.
203 runCabal timeoutSeconds cabalUnderTest cabal flags = do
204 tmpDir <- getTemporaryDirectory
206 -- cabal directory for this cabal under test
207 let cabalDir = tmpDir </> "solver-benchmarks-workdir" </> case cabalUnderTest of
208 CabalUnderTest1 -> "cabal1"
209 CabalUnderTest2 -> "cabal2"
211 putStrLn $ "Cabal directory (for " ++ cabal ++ ") " ++ cabalDir
212 createDirectoryIfMissing True cabalDir
214 -- shell environment
215 currEnv <- Map.fromList <$> getEnvironment
216 let thisEnv :: [(String, String)]
217 thisEnv = Map.toList $ currEnv
218 & Map.insert "CABAL_CONFIG" (cabalDir </> "config")
219 & Map.insert "CABAL_DIR" cabalDir
221 -- Initialize the config file, whether or not it already exists
222 runCabalCmdWithEnv cabalDir thisEnv ["user-config", "init", "--force"]
224 -- Run cabal update
225 putStrLn $ "Running cabal update (using " ++ cabal ++ ") ..."
226 runCabalCmdWithEnv cabalDir thisEnv ["update"]
228 -- return an actual runner
229 return $ \pkg -> do
230 ((exitCode, err), time) <- timeEvent $ do
232 let timeout = "timeout --foreground -sINT " ++ show timeoutSeconds
233 cabalCmd = unwords $
234 [ cabal
236 , "install"
238 -- These flags prevent a Cabal project or package environment from
239 -- affecting the install plan.
241 -- Note: we are somewhere in /tmp, hopefully there is no cabal.project on upper level
242 , "--package-env=non-existent-package-env"
244 -- --lib allows solving for packages with libraries or
245 -- executables.
246 , "--lib"
248 , unPackageName pkg
250 , "--dry-run"
252 -- The test doesn't currently handle stdout, so we suppress it
253 -- with silent. nowrap simplifies parsing the errors messages.
254 , "-vsilent+nowrap"
258 ++ flags
260 cmd = (shell (timeout ++ " " ++ cabalCmd))
261 { std_err = CreatePipe
262 , env = Just thisEnv
263 , cwd = Just cabalDir
266 -- TODO: Read stdout and compare the install plans.
267 (_, _, Just errh, ph) <- createProcess cmd
268 err <- BS.hGetContents errh
269 (, err) <$> waitForProcess ph
271 let exhaustiveMsg =
272 "After searching the rest of the dependency tree exhaustively"
273 result
274 | exitCode == ExitSuccess = Solution
275 | exitCode == ExitFailure 124 = Timeout
276 | fromString exhaustiveMsg `BS.isInfixOf` err = NoInstallPlan
277 | fromString "Backjump limit reached" `BS.isInfixOf` err = BackjumpLimit
278 | fromString "none of the components are available to build" `BS.isInfixOf` err = Unbuildable
279 | fromString "Dependency on unbuildable" `BS.isInfixOf` err = UnbuildableDep
280 | fromString "Dependency cycle between the following components" `BS.isInfixOf` err = ComponentCycle
281 | fromString "Problem with module re-exports" `BS.isInfixOf` err = ModReexpIssue
282 | fromString "There is no package named" `BS.isInfixOf` err = PkgNotFound
283 | otherwise = Unknown
284 return (CabalTrial time result)
285 where
286 runCabalCmdWithEnv cabalDir thisEnv args = do
287 (ec, uout, uerr) <- readCreateProcessWithExitCode (proc cabal args)
288 { cwd = Just cabalDir
289 , env = Just thisEnv
292 unless (ec == ExitSuccess) $ do
293 putStrLn uout
294 putStrLn uerr
295 exitWith ec
297 isSampleLargeEnough :: PValue Double -> Int -> Bool
298 isSampleLargeEnough pvalue trials =
299 -- mannWhitneyUCriticalValue, which can fail with too few samples, is only
300 -- used when both sample sizes are less than or equal to 20.
301 trials > 20 || isJust (mannWhitneyUCriticalValue (trials, trials) pvalue)
303 isSignificantTimeDifference :: PValue Double -> [NominalDiffTime] -> [NominalDiffTime] -> Bool
304 isSignificantTimeDifference pvalue xs ys =
305 let toVector = V.fromList . map diffTimeToDouble
306 in case mannWhitneyUtest SamplesDiffer pvalue (toVector xs) (toVector ys) of
307 Nothing -> error "not enough data for mannWhitneyUtest"
308 Just Significant -> True
309 Just NotSignificant -> False
311 -- Should we stop after the first trial of this package to save time? This
312 -- function skips the package if the results are uninteresting and the times are
313 -- within --min-run-time-percentage-difference-to-rerun.
314 shouldContinueAfterFirstTrial :: Double
315 -> NominalDiffTime
316 -> NominalDiffTime
317 -> CabalResult
318 -> CabalResult
319 -> Bool
320 shouldContinueAfterFirstTrial 0 _ _ _ _ = True
321 shouldContinueAfterFirstTrial _ _ _ Timeout Timeout = False
322 shouldContinueAfterFirstTrial maxRunTimeDifferenceToIgnore t1 t2 r1 r2 =
323 isSignificantResult r1 r2
324 || abs (t1 - t2) / min t1 t2 >= realToFrac (maxRunTimeDifferenceToIgnore / 100)
326 isSignificantResult :: CabalResult -> CabalResult -> Bool
327 isSignificantResult r1 r2 = r1 /= r2 || not (isExpectedResult r1)
329 -- Is this result expected in a benchmark run on all of Hackage?
330 isExpectedResult :: CabalResult -> Bool
331 isExpectedResult Solution = True
332 isExpectedResult NoInstallPlan = True
333 isExpectedResult BackjumpLimit = True
334 isExpectedResult Timeout = True
335 isExpectedResult Unbuildable = True
336 isExpectedResult UnbuildableDep = True
337 isExpectedResult ComponentCycle = True
338 isExpectedResult ModReexpIssue = True
339 isExpectedResult PkgNotFound = False
340 isExpectedResult Unknown = False
342 -- Combine CabalResults from multiple trials. Ignoring timeouts, all results
343 -- should be the same. If they aren't the same, we returns Unknown.
344 combineTrialResults :: [CabalResult] -> CabalResult
345 combineTrialResults rs
346 | r:_ <- rs
347 , allEqual rs = r
348 | allEqual [r | r <- rs, r /= Timeout] = Timeout
349 | otherwise = Unknown
350 where
351 allEqual :: Eq a => [a] -> Bool
352 allEqual xs = length (nub xs) == 1
354 timeEvent :: IO a -> IO (a, NominalDiffTime)
355 timeEvent task = do
356 start <- getCurrentTime
357 r <- task
358 end <- getCurrentTime
359 return (r, diffUTCTime end start)
361 diffTimeToDouble :: NominalDiffTime -> Double
362 diffTimeToDouble = fromRational . toRational
364 parserInfo :: ParserInfo Args
365 parserInfo = info (argParser <**> helper)
366 ( fullDesc
367 <> progDesc ("Find differences between two cabal commands when solving"
368 ++ " for all packages on Hackage.")
369 <> header "hackage-benchmark" )
371 argParser :: Parser Args
372 argParser = Args
373 <$> strOption
374 ( long "cabal1"
375 <> metavar "PATH"
376 <> help "First cabal executable")
377 <*> strOption
378 ( long "cabal2"
379 <> metavar "PATH"
380 <> help "Second cabal executable")
381 <*> option (words <$> str)
382 ( long "cabal1-flags"
383 <> value []
384 <> metavar "FLAGS"
385 <> help "Extra flags for the first cabal executable")
386 <*> option (words <$> str)
387 ( long "cabal2-flags"
388 <> value []
389 <> metavar "FLAGS"
390 <> help "Extra flags for the second cabal executable")
391 <*> option (map mkPackageName . words <$> str)
392 ( long "packages"
393 <> value []
394 <> metavar "PACKAGES"
395 <> help ("Space separated list of packages to test, or all of Hackage"
396 ++ " if unspecified"))
397 <*> option auto
398 ( long "min-run-time-percentage-difference-to-rerun"
399 <> showDefault
400 <> value 0.0
401 <> metavar "PERCENTAGE"
402 <> help ("Stop testing a package when the difference in run times in"
403 ++ " the first trial are within this percentage, in order to"
404 ++ " save time"))
405 <*> option (mkPValue <$> auto)
406 ( long "pvalue"
407 <> showDefault
408 <> value (mkPValue 0.05)
409 <> metavar "DOUBLE"
410 <> help ("p-value used to determine whether to print the results for"
411 ++ " each package"))
412 <*> option auto
413 ( long "trials"
414 <> showDefault
415 <> value 10
416 <> metavar "N"
417 <> help "Number of trials for each package")
418 <*> switch
419 ( long "concurrently"
420 <> help "Run cabals concurrently")
421 <*> switch
422 ( long "print-trials"
423 <> help "Whether to include the results from individual trials in the output")
424 <*> switch
425 ( long "print-skipped-packages"
426 <> help "Whether to include skipped packages in the output")
427 <*> option auto
428 ( long "timeout"
429 <> showDefault
430 <> value 90
431 <> metavar "SECONDS"
432 <> help "Maximum time to run a cabal command, in seconds")