1 {-# OPTIONS_GHC -fno-warn-orphans #-}
2 {-# LANGUAGE RecordWildCards #-}
3 {-# LANGUAGE TupleSections #-}
4 {-# LANGUAGE ScopedTypeVariables #-}
6 module HackageBenchmark
(
9 -- Exposed for testing:
11 , isSignificantTimeDifference
14 , shouldContinueAfterFirstTrial
17 import Control
.Concurrent
.Async
(concurrently
)
18 import Control
.Monad
(forM
, replicateM
, unless, when)
19 import qualified Data
.ByteString
as BS
20 import Data
.List
(nub, unzip4)
21 import Data
.Maybe (isJust, catMaybes)
22 import Data
.Monoid
((<>))
23 import Data
.String (fromString
)
24 import Data
.Function
((&))
25 import Data
.Time
(NominalDiffTime
, diffUTCTime
, getCurrentTime
)
26 import qualified Data
.Vector
.Unboxed
as V
27 import Options
.Applicative
28 import Statistics
.Sample
(mean
, stdDev
, geometricMean
)
29 import Statistics
.Test
.MannWhitneyU
( PositionTest
(..), TestResult
(..)
30 , mannWhitneyUCriticalValue
32 import Statistics
.Types
(PValue
, mkPValue
)
33 import System
.Directory
(getTemporaryDirectory
, createDirectoryIfMissing
)
34 import System
.Environment
(getEnvironment
)
35 import System
.Exit
(ExitCode(..), exitWith, exitFailure)
36 import System
.FilePath ((</>))
37 import System
.IO ( BufferMode(LineBuffering
), hPutStrLn, hSetBuffering, stderr
39 import System
.Process
( StdStream
(CreatePipe
), CreateProcess
(..), callProcess
40 , createProcess
, readProcess
, shell
, waitForProcess
, proc
, readCreateProcessWithExitCode
)
41 import Text
.Printf
(printf
)
43 import qualified Data
.Map
.Strict
as Map
45 import Distribution
.Package
(PackageName
, mkPackageName
, unPackageName
)
49 , argCabal2
:: FilePath
50 , argCabal1Flags
:: [String]
51 , argCabal2Flags
:: [String]
52 , argPackages
:: [PackageName
]
53 , argMinRunTimeDifferenceToRerun
:: Double
54 , argPValue
:: PValue
Double
56 , argConcurrently
:: Bool
57 , argPrintTrials
:: Bool
58 , argPrintSkippedPackages
:: Bool
59 , argTimeoutSeconds
:: Int
62 data CabalTrial
= CabalTrial NominalDiffTime CabalResult
77 hackageBenchmarkMain
:: IO ()
78 hackageBenchmarkMain
= do
79 hSetBuffering stdout LineBuffering
80 args
@Args
{..} <- execParser parserInfo
83 pkgs
<- getPackages args
86 let concurrently
' :: IO a
-> IO b
-> IO (a
, b
)
87 concurrently
' | argConcurrently
= concurrently
88 |
otherwise = \ma mb
-> do { a
<- ma
; b
<- mb
; return (a
, b
) }
90 let -- The maximum length of the heading and package names.
91 nameColumnWidth
:: Int
93 maximum $ map length $ "package" : map unPackageName pkgs
95 -- create cabal runners
96 runCabal1
<- runCabal argTimeoutSeconds CabalUnderTest1 argCabal1 argCabal1Flags
97 runCabal2
<- runCabal argTimeoutSeconds CabalUnderTest2 argCabal2 argCabal2Flags
99 -- When the output contains both trails and summaries, label each row as
100 -- "trial" or "summary".
101 when argPrintTrials
$ putStr $ printf
"%-16s " "trial/summary"
103 printf
"%-*s %-14s %-14s %11s %11s %11s %11s %11s"
104 nameColumnWidth
"package" "result1" "result2"
105 "mean1" "mean2" "stddev1" "stddev2" "speedup"
107 speedups
:: [Double] <- fmap catMaybes $ forM pkgs
$ \pkg
-> do
108 let printTrial msgType result1 result2 time1 time2
=
110 printf
"%-16s %-*s %-14s %-14s %10.3fs %10.3fs"
111 msgType nameColumnWidth
(unPackageName pkg
)
112 (show result1
) (show result2
)
113 (diffTimeToDouble time1
) (diffTimeToDouble time2
)
115 (CabalTrial t1 r1
, CabalTrial t2 r2
) <- runCabal1 pkg `concurrently
'` runCabal2 pkg
118 shouldContinueAfterFirstTrial argMinRunTimeDifferenceToRerun t1 t2 r1 r2
120 when argPrintSkippedPackages
$
122 then printTrial
"trial (skipping)" r1 r2 t1 t2
123 else putStrLn $ printf
"%-*s (first run times were too similar)"
124 nameColumnWidth
(unPackageName pkg
)
127 when argPrintTrials
$ printTrial
"trial" r1 r2 t1 t2
128 (ts1
, ts2
, rs1
, rs2
) <- (unzip4 . ((t1
, t2
, r1
, r2
) :) <$>)
129 . replicateM
(argTrials
- 1) $ do
131 (CabalTrial t1
' r1
', CabalTrial t2
' r2
') <- runCabal1 pkg `concurrently
'` runCabal2 pkg
132 when argPrintTrials
$ printTrial
"trial" r1
' r2
' t1
' t2
'
133 return (t1
', t2
', r1
', r2
')
135 let result1
= combineTrialResults rs1
136 result2
= combineTrialResults rs2
137 times1
= V
.fromList
(map diffTimeToDouble ts1
)
138 times2
= V
.fromList
(map diffTimeToDouble ts2
)
141 stddev1
= stdDev times1
142 stddev2
= stdDev times2
143 speedup
= mean1
/ mean2
145 when argPrintTrials
$ putStr $ printf
"%-16s " "summary"
146 if isSignificantResult result1 result2
147 || isSignificantTimeDifference argPValue ts1 ts2
149 printf
"%-*s %-14s %-14s %10.3fs %10.3fs %10.3fs %10.3fs %10.3f"
150 nameColumnWidth
(unPackageName pkg
)
151 (show result1
) (show result2
) mean1 mean2 stddev1 stddev2 speedup
152 else when (argPrintTrials || argPrintSkippedPackages
) $
154 printf
"%-*s (not significant, speedup = %10.3f)" nameColumnWidth
(unPackageName pkg
) speedup
156 -- return speedup value
157 return (Just speedup
)
159 -- finally, calculate the geometric mean of speedups
160 printf
"Geometric mean of %d packages' speedups is %10.3f\n" (length speedups
) (geometricMean
(V
.fromList speedups
))
163 checkArgs
:: Args
-> IO ()
164 checkArgs Args
{..} = do
165 let die msg
= hPutStrLn stderr msg
>> exitFailure
166 unless (argTrials
> 0) $ die
"--trials must be greater than 0."
167 unless (argMinRunTimeDifferenceToRerun
>= 0) $
168 die
"--min-run-time-percentage-difference-to-rerun must be non-negative."
169 unless (isSampleLargeEnough argPValue argTrials
) $
170 die
"p-value is too small for the number of trials."
172 printConfig
:: Args
-> IO ()
173 printConfig Args
{..} = do
174 putStrLn "Comparing:"
175 putStrLn $ "1: " ++ argCabal1
++ " " ++ unwords argCabal1Flags
176 callProcess argCabal1
["--version"]
177 putStrLn $ "2: " ++ argCabal2
++ " " ++ unwords argCabal2Flags
178 callProcess argCabal2
["--version"]
179 -- TODO: Print index state.
180 putStrLn "Base package database:"
181 callProcess
"ghc-pkg" ["list"]
183 getPackages
:: Args
-> IO [PackageName
]
184 getPackages Args
{..} = do
188 putStrLn $ "Obtaining the package list (using " ++ argCabal1
++ ") ..."
189 list <- readProcess argCabal1
["list", "--simple-output"] ""
190 return $ nub [mkPackageName
$ head (words line
) | line
<- lines list]
192 putStrLn "Using given package list ..."
194 putStrLn $ "Done, got " ++ show (length pkgs
) ++ " packages."
197 data CabalUnderTest
= CabalUnderTest1 | CabalUnderTest2
200 :: Int -- ^ timeout in seconds
201 -> CabalUnderTest
-- ^ cabal under test
202 -> FilePath -- ^ cabal
203 -> [String] -- ^ flags
204 -> IO (PackageName
-> IO CabalTrial
) -- ^ testing function.
205 runCabal timeoutSeconds cabalUnderTest cabal flags
= do
206 tmpDir
<- getTemporaryDirectory
208 -- cabal directory for this cabal under test
209 let cabalDir
= tmpDir
</> "solver-benchmarks-workdir" </> case cabalUnderTest
of
210 CabalUnderTest1
-> "cabal1"
211 CabalUnderTest2
-> "cabal2"
213 putStrLn $ "Cabal directory (for " ++ cabal
++ ") " ++ cabalDir
214 createDirectoryIfMissing
True cabalDir
217 currEnv
<- Map
.fromList
<$> getEnvironment
218 let thisEnv
:: [(String, String)]
219 thisEnv
= Map
.toList
$ currEnv
220 & Map
.insert "CABAL_CONFIG" (cabalDir
</> "config")
221 & Map
.insert "CABAL_DIR" cabalDir
223 -- Initialize the config file, whether or not it already exists
224 runCabalCmdWithEnv cabalDir thisEnv
["user-config", "init", "--force"]
227 putStrLn $ "Running cabal update (using " ++ cabal
++ ") ..."
228 runCabalCmdWithEnv cabalDir thisEnv
["update"]
230 -- return an actual runner
232 ((exitCode
, err
), time
) <- timeEvent
$ do
234 let timeout
= "timeout --foreground -sINT " ++ show timeoutSeconds
240 -- These flags prevent a Cabal project or package environment from
241 -- affecting the install plan.
243 -- Note: we are somewhere in /tmp, hopefully there is no cabal.project on upper level
244 , "--package-env=non-existent-package-env"
246 -- --lib allows solving for packages with libraries or
254 -- The test doesn't currently handle stdout, so we suppress it
255 -- with silent. nowrap simplifies parsing the errors messages.
262 cmd
= (shell
(timeout
++ " " ++ cabalCmd
))
263 { std_err
= CreatePipe
265 , cwd
= Just cabalDir
268 -- TODO: Read stdout and compare the install plans.
269 (_
, _
, Just errh
, ph
) <- createProcess cmd
270 err
<- BS
.hGetContents errh
271 (, err
) <$> waitForProcess ph
274 "After searching the rest of the dependency tree exhaustively"
276 | exitCode
== ExitSuccess
= Solution
277 | exitCode
== ExitFailure
124 = Timeout
278 | fromString exhaustiveMsg `BS
.isInfixOf` err
= NoInstallPlan
279 | fromString
"Backjump limit reached" `BS
.isInfixOf` err
= BackjumpLimit
280 | fromString
"none of the components are available to build" `BS
.isInfixOf` err
= Unbuildable
281 | fromString
"Dependency on unbuildable" `BS
.isInfixOf` err
= UnbuildableDep
282 | fromString
"Dependency cycle between the following components" `BS
.isInfixOf` err
= ComponentCycle
283 | fromString
"Problem with module re-exports" `BS
.isInfixOf` err
= ModReexpIssue
284 | fromString
"There is no package named" `BS
.isInfixOf` err
= PkgNotFound
285 |
otherwise = Unknown
286 return (CabalTrial time result
)
288 runCabalCmdWithEnv cabalDir thisEnv args
= do
289 (ec
, uout
, uerr
) <- readCreateProcessWithExitCode
(proc cabal args
)
290 { cwd
= Just cabalDir
294 unless (ec
== ExitSuccess
) $ do
299 isSampleLargeEnough
:: PValue
Double -> Int -> Bool
300 isSampleLargeEnough pvalue trials
=
301 -- mannWhitneyUCriticalValue, which can fail with too few samples, is only
302 -- used when both sample sizes are less than or equal to 20.
303 trials
> 20 ||
isJust (mannWhitneyUCriticalValue
(trials
, trials
) pvalue
)
305 isSignificantTimeDifference
:: PValue
Double -> [NominalDiffTime
] -> [NominalDiffTime
] -> Bool
306 isSignificantTimeDifference pvalue xs ys
=
307 let toVector
= V
.fromList
. map diffTimeToDouble
308 in case mannWhitneyUtest SamplesDiffer pvalue
(toVector xs
) (toVector ys
) of
309 Nothing
-> error "not enough data for mannWhitneyUtest"
310 Just Significant
-> True
311 Just NotSignificant
-> False
313 -- Should we stop after the first trial of this package to save time? This
314 -- function skips the package if the results are uninteresting and the times are
315 -- within --min-run-time-percentage-difference-to-rerun.
316 shouldContinueAfterFirstTrial
:: Double
322 shouldContinueAfterFirstTrial
0 _ _ _ _
= True
323 shouldContinueAfterFirstTrial _ _ _ Timeout Timeout
= False
324 shouldContinueAfterFirstTrial maxRunTimeDifferenceToIgnore t1 t2 r1 r2
=
325 isSignificantResult r1 r2
326 ||
abs (t1
- t2
) / min t1 t2
>= realToFrac (maxRunTimeDifferenceToIgnore
/ 100)
328 isSignificantResult
:: CabalResult
-> CabalResult
-> Bool
329 isSignificantResult r1 r2
= r1
/= r2 ||
not (isExpectedResult r1
)
331 -- Is this result expected in a benchmark run on all of Hackage?
332 isExpectedResult
:: CabalResult
-> Bool
333 isExpectedResult Solution
= True
334 isExpectedResult NoInstallPlan
= True
335 isExpectedResult BackjumpLimit
= True
336 isExpectedResult Timeout
= True
337 isExpectedResult Unbuildable
= True
338 isExpectedResult UnbuildableDep
= True
339 isExpectedResult ComponentCycle
= True
340 isExpectedResult ModReexpIssue
= True
341 isExpectedResult PkgNotFound
= False
342 isExpectedResult Unknown
= False
344 -- Combine CabalResults from multiple trials. Ignoring timeouts, all results
345 -- should be the same. If they aren't the same, we returns Unknown.
346 combineTrialResults
:: [CabalResult
] -> CabalResult
347 combineTrialResults rs
348 | allEqual rs
= head rs
349 | allEqual
[r | r
<- rs
, r
/= Timeout
] = Timeout
350 |
otherwise = Unknown
352 allEqual
:: Eq a
=> [a
] -> Bool
353 allEqual xs
= length (nub xs
) == 1
355 timeEvent
:: IO a
-> IO (a
, NominalDiffTime
)
357 start
<- getCurrentTime
359 end
<- getCurrentTime
360 return (r
, diffUTCTime end start
)
362 diffTimeToDouble
:: NominalDiffTime
-> Double
363 diffTimeToDouble
= fromRational . toRational
365 parserInfo
:: ParserInfo Args
366 parserInfo
= info
(argParser
<**> helper
)
368 <> progDesc
("Find differences between two cabal commands when solving"
369 ++ " for all packages on Hackage.")
370 <> header
"hackage-benchmark" )
372 argParser
:: Parser Args
377 <> help
"First cabal executable")
381 <> help
"Second cabal executable")
382 <*> option
(words <$> str
)
383 ( long
"cabal1-flags"
386 <> help
"Extra flags for the first cabal executable")
387 <*> option
(words <$> str
)
388 ( long
"cabal2-flags"
391 <> help
"Extra flags for the second cabal executable")
392 <*> option
(map mkPackageName
. words <$> str
)
395 <> metavar
"PACKAGES"
396 <> help
("Space separated list of packages to test, or all of Hackage"
397 ++ " if unspecified"))
399 ( long
"min-run-time-percentage-difference-to-rerun"
402 <> metavar
"PERCENTAGE"
403 <> help
("Stop testing a package when the difference in run times in"
404 ++ " the first trial are within this percentage, in order to"
406 <*> option
(mkPValue
<$> auto
)
409 <> value (mkPValue
0.05)
411 <> help
("p-value used to determine whether to print the results for"
418 <> help
"Number of trials for each package")
420 ( long
"concurrently"
421 <> help
"Run cabals concurrently")
423 ( long
"print-trials"
424 <> help
"Whether to include the results from individual trials in the output")
426 ( long
"print-skipped-packages"
427 <> help
"Whether to include skipped packages in the output")
433 <> help
"Maximum time to run a cabal command, in seconds")