1 {-# OPTIONS_GHC -fno-warn-orphans #-}
2 {-# LANGUAGE RecordWildCards #-}
3 {-# LANGUAGE TupleSections #-}
4 {-# LANGUAGE ScopedTypeVariables #-}
6 module HackageBenchmark
(
9 -- Exposed for testing:
11 , isSignificantTimeDifference
14 , shouldContinueAfterFirstTrial
17 import Control
.Concurrent
.Async
(concurrently
)
18 import Control
.Monad
(forM
, replicateM
, unless, when)
19 import qualified Data
.ByteString
as BS
20 import Data
.List
(nub, unzip4)
21 import Data
.Maybe (isJust, catMaybes)
22 import Data
.Monoid
((<>))
23 import Data
.String (fromString
)
24 import Data
.Function
((&))
25 import Data
.Time
(NominalDiffTime
, diffUTCTime
, getCurrentTime
)
26 import qualified Data
.Vector
.Unboxed
as V
27 import Options
.Applicative
28 import Statistics
.Sample
(mean
, stdDev
, geometricMean
)
29 import Statistics
.Test
.MannWhitneyU
( PositionTest
(..), TestResult
(..)
30 , mannWhitneyUCriticalValue
32 import Statistics
.Types
(PValue
, mkPValue
)
33 import System
.Directory
(getTemporaryDirectory
, createDirectoryIfMissing
)
34 import System
.Environment
(getEnvironment
)
35 import System
.Exit
(ExitCode(..), exitWith, exitFailure)
36 import System
.FilePath ((</>))
37 import System
.IO ( BufferMode(LineBuffering
), hPutStrLn, hSetBuffering, stderr
39 import System
.Process
( StdStream
(CreatePipe
), CreateProcess
(..), callProcess
40 , createProcess
, readProcess
, shell
, waitForProcess
, proc
, readCreateProcessWithExitCode
)
41 import Text
.Printf
(printf
)
43 import qualified Data
.Map
.Strict
as Map
45 import Distribution
.Package
(PackageName
, mkPackageName
, unPackageName
)
49 , argCabal2
:: FilePath
50 , argCabal1Flags
:: [String]
51 , argCabal2Flags
:: [String]
52 , argPackages
:: [PackageName
]
53 , argMinRunTimeDifferenceToRerun
:: Double
54 , argPValue
:: PValue
Double
56 , argConcurrently
:: Bool
57 , argPrintTrials
:: Bool
58 , argPrintSkippedPackages
:: Bool
59 , argTimeoutSeconds
:: Int
62 data CabalTrial
= CabalTrial NominalDiffTime CabalResult
77 hackageBenchmarkMain
:: IO ()
78 hackageBenchmarkMain
= do
79 hSetBuffering stdout LineBuffering
80 args
@Args
{..} <- execParser parserInfo
83 pkgs
<- getPackages args
86 let concurrently
' :: IO a
-> IO b
-> IO (a
, b
)
87 concurrently
' | argConcurrently
= concurrently
88 |
otherwise = \ma mb
-> do { a
<- ma
; b
<- mb
; return (a
, b
) }
90 let -- The maximum length of the heading and package names.
91 nameColumnWidth
:: Int
93 maximum $ map length $ "package" : map unPackageName pkgs
95 -- create cabal runners
96 runCabal1
<- runCabal argTimeoutSeconds CabalUnderTest1 argCabal1 argCabal1Flags
97 runCabal2
<- runCabal argTimeoutSeconds CabalUnderTest2 argCabal2 argCabal2Flags
99 -- When the output contains both trails and summaries, label each row as
100 -- "trial" or "summary".
101 when argPrintTrials
$ putStr $ printf
"%-16s " "trial/summary"
103 printf
"%-*s %-14s %-14s %11s %11s %11s %11s %11s"
104 nameColumnWidth
"package" "result1" "result2"
105 "mean1" "mean2" "stddev1" "stddev2" "speedup"
107 speedups
:: [Double] <- fmap catMaybes $ forM pkgs
$ \pkg
-> do
108 let printTrial msgType result1 result2 time1 time2
=
110 printf
"%-16s %-*s %-14s %-14s %10.3fs %10.3fs"
111 msgType nameColumnWidth
(unPackageName pkg
)
112 (show result1
) (show result2
)
113 (diffTimeToDouble time1
) (diffTimeToDouble time2
)
115 (CabalTrial t1 r1
, CabalTrial t2 r2
) <- runCabal1 pkg `concurrently
'` runCabal2 pkg
118 shouldContinueAfterFirstTrial argMinRunTimeDifferenceToRerun t1 t2 r1 r2
120 when argPrintSkippedPackages
$
122 then printTrial
"trial (skipping)" r1 r2 t1 t2
123 else putStrLn $ printf
"%-*s (first run times were too similar)"
124 nameColumnWidth
(unPackageName pkg
)
127 when argPrintTrials
$ printTrial
"trial" r1 r2 t1 t2
128 (ts1
, ts2
, rs1
, rs2
) <- (unzip4 . ((t1
, t2
, r1
, r2
) :) <$>)
129 . replicateM
(argTrials
- 1) $ do
131 (CabalTrial t1
' r1
', CabalTrial t2
' r2
') <- runCabal1 pkg `concurrently
'` runCabal2 pkg
132 when argPrintTrials
$ printTrial
"trial" r1
' r2
' t1
' t2
'
133 return (t1
', t2
', r1
', r2
')
135 let result1
= combineTrialResults rs1
136 result2
= combineTrialResults rs2
137 times1
= V
.fromList
(map diffTimeToDouble ts1
)
138 times2
= V
.fromList
(map diffTimeToDouble ts2
)
141 stddev1
= stdDev times1
142 stddev2
= stdDev times2
143 speedup
= mean1
/ mean2
145 when argPrintTrials
$ putStr $ printf
"%-16s " "summary"
146 if isSignificantResult result1 result2
147 || isSignificantTimeDifference argPValue ts1 ts2
149 printf
"%-*s %-14s %-14s %10.3fs %10.3fs %10.3fs %10.3fs %10.3f"
150 nameColumnWidth
(unPackageName pkg
)
151 (show result1
) (show result2
) mean1 mean2 stddev1 stddev2 speedup
152 else when (argPrintTrials || argPrintSkippedPackages
) $
154 printf
"%-*s (not significant, speedup = %10.3f)" nameColumnWidth
(unPackageName pkg
) speedup
156 -- return speedup value
157 return (Just speedup
)
159 -- finally, calculate the geometric mean of speedups
160 printf
"Geometric mean of %d packages' speedups is %10.3f\n" (length speedups
) (geometricMean
(V
.fromList speedups
))
163 checkArgs
:: Args
-> IO ()
164 checkArgs Args
{..} = do
165 let die msg
= hPutStrLn stderr msg
>> exitFailure
166 unless (argTrials
> 0) $ die
"--trials must be greater than 0."
167 unless (argMinRunTimeDifferenceToRerun
>= 0) $
168 die
"--min-run-time-percentage-difference-to-rerun must be non-negative."
169 unless (isSampleLargeEnough argPValue argTrials
) $
170 die
"p-value is too small for the number of trials."
172 printConfig
:: Args
-> IO ()
173 printConfig Args
{..} = do
174 putStrLn "Comparing:"
175 putStrLn $ "1: " ++ argCabal1
++ " " ++ unwords argCabal1Flags
176 callProcess argCabal1
["--version"]
177 putStrLn $ "2: " ++ argCabal2
++ " " ++ unwords argCabal2Flags
178 callProcess argCabal2
["--version"]
179 -- TODO: Print index state.
180 putStrLn "Base package database:"
181 callProcess
"ghc-pkg" ["list"]
183 getPackages
:: Args
-> IO [PackageName
]
184 getPackages Args
{..} = do
188 putStrLn $ "Obtaining the package list (using " ++ argCabal1
++ ") ..."
189 list <- readProcess argCabal1
["list", "--simple-output"] ""
190 return $ nub [mkPackageName
$ head (words line
) | line
<- lines list]
192 putStrLn "Using given package list ..."
194 putStrLn $ "Done, got " ++ show (length pkgs
) ++ " packages."
197 data CabalUnderTest
= CabalUnderTest1 | CabalUnderTest2
200 :: Int -- ^ timeout in seconds
201 -> CabalUnderTest
-- ^ cabal under test
202 -> FilePath -- ^ cabal
203 -> [String] -- ^ flags
204 -> IO (PackageName
-> IO CabalTrial
) -- ^ testing function.
205 runCabal timeoutSeconds cabalUnderTest cabal flags
= do
206 tmpDir
<- getTemporaryDirectory
208 -- cabal directory for this cabal under test
209 let cabalDir
= tmpDir
</> "solver-benchmarks-workdir" </> case cabalUnderTest
of
210 CabalUnderTest1
-> "cabal1"
211 CabalUnderTest2
-> "cabal2"
213 putStrLn $ "Cabal directory (for " ++ cabal
++ ") " ++ cabalDir
214 createDirectoryIfMissing
True cabalDir
217 currEnv
<- Map
.fromList
<$> getEnvironment
218 let thisEnv
:: [(String, String)]
219 thisEnv
= Map
.toList
$ currEnv
220 & Map
.insert "CABAL_CONFIG" (cabalDir
</> "config")
221 & Map
.insert "CABAL_DIR" cabalDir
224 putStrLn $ "Running cabal update (using " ++ cabal
++ ") ..."
225 (ec
, uout
, uerr
) <- readCreateProcessWithExitCode
(proc cabal
["update"])
226 { cwd
= Just cabalDir
230 unless (ec
== ExitSuccess
) $ do
235 -- return an actual runner
237 ((exitCode
, err
), time
) <- timeEvent
$ do
239 let timeout
= "timeout --foreground -sINT " ++ show timeoutSeconds
245 -- These flags prevent a Cabal project or package environment from
246 -- affecting the install plan.
248 -- Note: we are somewhere in /tmp, hopefully there is no cabal.project on upper level
249 , "--package-env=non-existent-package-env"
251 -- --lib allows solving for packages with libraries or
259 -- The test doesn't currently handle stdout, so we suppress it
260 -- with silent. nowrap simplifies parsing the errors messages.
267 cmd
= (shell
(timeout
++ " " ++ cabalCmd
))
268 { std_err
= CreatePipe
270 , cwd
= Just cabalDir
273 -- TODO: Read stdout and compare the install plans.
274 (_
, _
, Just errh
, ph
) <- createProcess cmd
275 err
<- BS
.hGetContents errh
276 (, err
) <$> waitForProcess ph
279 "After searching the rest of the dependency tree exhaustively"
281 | exitCode
== ExitSuccess
= Solution
282 | exitCode
== ExitFailure
124 = Timeout
283 | fromString exhaustiveMsg `BS
.isInfixOf` err
= NoInstallPlan
284 | fromString
"Backjump limit reached" `BS
.isInfixOf` err
= BackjumpLimit
285 | fromString
"none of the components are available to build" `BS
.isInfixOf` err
= Unbuildable
286 | fromString
"Dependency on unbuildable" `BS
.isInfixOf` err
= UnbuildableDep
287 | fromString
"Dependency cycle between the following components" `BS
.isInfixOf` err
= ComponentCycle
288 | fromString
"Problem with module re-exports" `BS
.isInfixOf` err
= ModReexpIssue
289 | fromString
"There is no package named" `BS
.isInfixOf` err
= PkgNotFound
290 |
otherwise = Unknown
291 return (CabalTrial time result
)
293 isSampleLargeEnough
:: PValue
Double -> Int -> Bool
294 isSampleLargeEnough pvalue trials
=
295 -- mannWhitneyUCriticalValue, which can fail with too few samples, is only
296 -- used when both sample sizes are less than or equal to 20.
297 trials
> 20 ||
isJust (mannWhitneyUCriticalValue
(trials
, trials
) pvalue
)
299 isSignificantTimeDifference
:: PValue
Double -> [NominalDiffTime
] -> [NominalDiffTime
] -> Bool
300 isSignificantTimeDifference pvalue xs ys
=
301 let toVector
= V
.fromList
. map diffTimeToDouble
302 in case mannWhitneyUtest SamplesDiffer pvalue
(toVector xs
) (toVector ys
) of
303 Nothing
-> error "not enough data for mannWhitneyUtest"
304 Just Significant
-> True
305 Just NotSignificant
-> False
307 -- Should we stop after the first trial of this package to save time? This
308 -- function skips the package if the results are uninteresting and the times are
309 -- within --min-run-time-percentage-difference-to-rerun.
310 shouldContinueAfterFirstTrial
:: Double
316 shouldContinueAfterFirstTrial
0 _ _ _ _
= True
317 shouldContinueAfterFirstTrial _ _ _ Timeout Timeout
= False
318 shouldContinueAfterFirstTrial maxRunTimeDifferenceToIgnore t1 t2 r1 r2
=
319 isSignificantResult r1 r2
320 ||
abs (t1
- t2
) / min t1 t2
>= realToFrac (maxRunTimeDifferenceToIgnore
/ 100)
322 isSignificantResult
:: CabalResult
-> CabalResult
-> Bool
323 isSignificantResult r1 r2
= r1
/= r2 ||
not (isExpectedResult r1
)
325 -- Is this result expected in a benchmark run on all of Hackage?
326 isExpectedResult
:: CabalResult
-> Bool
327 isExpectedResult Solution
= True
328 isExpectedResult NoInstallPlan
= True
329 isExpectedResult BackjumpLimit
= True
330 isExpectedResult Timeout
= True
331 isExpectedResult Unbuildable
= True
332 isExpectedResult UnbuildableDep
= True
333 isExpectedResult ComponentCycle
= True
334 isExpectedResult ModReexpIssue
= True
335 isExpectedResult PkgNotFound
= False
336 isExpectedResult Unknown
= False
338 -- Combine CabalResults from multiple trials. Ignoring timeouts, all results
339 -- should be the same. If they aren't the same, we returns Unknown.
340 combineTrialResults
:: [CabalResult
] -> CabalResult
341 combineTrialResults rs
342 | allEqual rs
= head rs
343 | allEqual
[r | r
<- rs
, r
/= Timeout
] = Timeout
344 |
otherwise = Unknown
346 allEqual
:: Eq a
=> [a
] -> Bool
347 allEqual xs
= length (nub xs
) == 1
349 timeEvent
:: IO a
-> IO (a
, NominalDiffTime
)
351 start
<- getCurrentTime
353 end
<- getCurrentTime
354 return (r
, diffUTCTime end start
)
356 diffTimeToDouble
:: NominalDiffTime
-> Double
357 diffTimeToDouble
= fromRational . toRational
359 parserInfo
:: ParserInfo Args
360 parserInfo
= info
(argParser
<**> helper
)
362 <> progDesc
("Find differences between two cabal commands when solving"
363 ++ " for all packages on Hackage.")
364 <> header
"hackage-benchmark" )
366 argParser
:: Parser Args
371 <> help
"First cabal executable")
375 <> help
"Second cabal executable")
376 <*> option
(words <$> str
)
377 ( long
"cabal1-flags"
380 <> help
"Extra flags for the first cabal executable")
381 <*> option
(words <$> str
)
382 ( long
"cabal2-flags"
385 <> help
"Extra flags for the second cabal executable")
386 <*> option
(map mkPackageName
. words <$> str
)
389 <> metavar
"PACKAGES"
390 <> help
("Space separated list of packages to test, or all of Hackage"
391 ++ " if unspecified"))
393 ( long
"min-run-time-percentage-difference-to-rerun"
396 <> metavar
"PERCENTAGE"
397 <> help
("Stop testing a package when the difference in run times in"
398 ++ " the first trial are within this percentage, in order to"
400 <*> option
(mkPValue
<$> auto
)
403 <> value (mkPValue
0.05)
405 <> help
("p-value used to determine whether to print the results for"
412 <> help
"Number of trials for each package")
414 ( long
"concurrently"
415 <> help
"Run cabals concurrently")
417 ( long
"print-trials"
418 <> help
"Whether to include the results from individual trials in the output")
420 ( long
"print-skipped-packages"
421 <> help
"Whether to include skipped packages in the output")
427 <> help
"Maximum time to run a cabal command, in seconds")