1 {-# LANGUAGE RecordWildCards #-}
2 {-# LANGUAGE TupleSections #-}
3 {-# LANGUAGE ScopedTypeVariables #-}
5 module HackageBenchmark
(
8 -- Exposed for testing:
10 , isSignificantTimeDifference
13 , shouldContinueAfterFirstTrial
16 import Control
.Concurrent
.Async
(concurrently
)
17 import Control
.Monad
(forM
, replicateM
, unless, when)
18 import qualified Data
.ByteString
as BS
19 import Data
.List
(nub, unzip4)
20 import Data
.Maybe (isJust, catMaybes)
21 import Data
.String (fromString
)
22 import Data
.Function
((&))
23 import Data
.Time
(NominalDiffTime
, diffUTCTime
, getCurrentTime
)
24 import qualified Data
.Vector
.Unboxed
as V
25 import Options
.Applicative
26 import Statistics
.Sample
(mean
, stdDev
, geometricMean
)
27 import Statistics
.Test
.MannWhitneyU
( PositionTest
(..), TestResult
(..)
28 , mannWhitneyUCriticalValue
30 import Statistics
.Types
(PValue
, mkPValue
)
31 import System
.Directory
(getTemporaryDirectory
, createDirectoryIfMissing
)
32 import System
.Environment
(getEnvironment
)
33 import System
.Exit
(ExitCode(..), exitWith, exitFailure)
34 import System
.FilePath ((</>))
35 import System
.IO ( BufferMode(LineBuffering
), hPutStrLn, hSetBuffering, stderr
37 import System
.Process
( StdStream
(CreatePipe
), CreateProcess
(..), callProcess
38 , createProcess
, readProcess
, shell
, waitForProcess
, proc
, readCreateProcessWithExitCode
)
39 import Text
.Printf
(printf
)
41 import qualified Data
.Map
.Strict
as Map
43 import Distribution
.Package
(PackageName
, mkPackageName
, unPackageName
)
47 , argCabal2
:: FilePath
48 , argCabal1Flags
:: [String]
49 , argCabal2Flags
:: [String]
50 , argPackages
:: [PackageName
]
51 , argMinRunTimeDifferenceToRerun
:: Double
52 , argPValue
:: PValue
Double
54 , argConcurrently
:: Bool
55 , argPrintTrials
:: Bool
56 , argPrintSkippedPackages
:: Bool
57 , argTimeoutSeconds
:: Int
60 data CabalTrial
= CabalTrial NominalDiffTime CabalResult
75 hackageBenchmarkMain
:: IO ()
76 hackageBenchmarkMain
= do
77 hSetBuffering stdout LineBuffering
78 args
@Args
{..} <- execParser parserInfo
81 pkgs
<- getPackages args
84 let concurrently
' :: IO a
-> IO b
-> IO (a
, b
)
85 concurrently
' | argConcurrently
= concurrently
86 |
otherwise = \ma mb
-> do { a
<- ma
; b
<- mb
; return (a
, b
) }
88 let -- The maximum length of the heading and package names.
89 nameColumnWidth
:: Int
91 maximum $ map length $ "package" : map unPackageName pkgs
93 -- create cabal runners
94 runCabal1
<- runCabal argTimeoutSeconds CabalUnderTest1 argCabal1 argCabal1Flags
95 runCabal2
<- runCabal argTimeoutSeconds CabalUnderTest2 argCabal2 argCabal2Flags
97 -- When the output contains both trails and summaries, label each row as
98 -- "trial" or "summary".
99 when argPrintTrials
$ putStr $ printf
"%-16s " "trial/summary"
101 printf
"%-*s %-14s %-14s %11s %11s %11s %11s %11s"
102 nameColumnWidth
"package" "result1" "result2"
103 "mean1" "mean2" "stddev1" "stddev2" "speedup"
105 speedups
:: [Double] <- fmap catMaybes $ forM pkgs
$ \pkg
-> do
106 let printTrial msgType result1 result2 time1 time2
=
108 printf
"%-16s %-*s %-14s %-14s %10.3fs %10.3fs"
109 msgType nameColumnWidth
(unPackageName pkg
)
110 (show result1
) (show result2
)
111 (diffTimeToDouble time1
) (diffTimeToDouble time2
)
113 (CabalTrial t1 r1
, CabalTrial t2 r2
) <- runCabal1 pkg `concurrently
'` runCabal2 pkg
116 shouldContinueAfterFirstTrial argMinRunTimeDifferenceToRerun t1 t2 r1 r2
118 when argPrintSkippedPackages
$
120 then printTrial
"trial (skipping)" r1 r2 t1 t2
121 else putStrLn $ printf
"%-*s (first run times were too similar)"
122 nameColumnWidth
(unPackageName pkg
)
125 when argPrintTrials
$ printTrial
"trial" r1 r2 t1 t2
126 (ts1
, ts2
, rs1
, rs2
) <- (unzip4 . ((t1
, t2
, r1
, r2
) :) <$>)
127 . replicateM
(argTrials
- 1) $ do
129 (CabalTrial t1
' r1
', CabalTrial t2
' r2
') <- runCabal1 pkg `concurrently
'` runCabal2 pkg
130 when argPrintTrials
$ printTrial
"trial" r1
' r2
' t1
' t2
'
131 return (t1
', t2
', r1
', r2
')
133 let result1
= combineTrialResults rs1
134 result2
= combineTrialResults rs2
135 times1
= V
.fromList
(map diffTimeToDouble ts1
)
136 times2
= V
.fromList
(map diffTimeToDouble ts2
)
139 stddev1
= stdDev times1
140 stddev2
= stdDev times2
141 speedup
= mean1
/ mean2
143 when argPrintTrials
$ putStr $ printf
"%-16s " "summary"
144 if isSignificantResult result1 result2
145 || isSignificantTimeDifference argPValue ts1 ts2
147 printf
"%-*s %-14s %-14s %10.3fs %10.3fs %10.3fs %10.3fs %10.3f"
148 nameColumnWidth
(unPackageName pkg
)
149 (show result1
) (show result2
) mean1 mean2 stddev1 stddev2 speedup
150 else when (argPrintTrials || argPrintSkippedPackages
) $
152 printf
"%-*s (not significant, speedup = %10.3f)" nameColumnWidth
(unPackageName pkg
) speedup
154 -- return speedup value
155 return (Just speedup
)
157 -- finally, calculate the geometric mean of speedups
158 printf
"Geometric mean of %d packages' speedups is %10.3f\n" (length speedups
) (geometricMean
(V
.fromList speedups
))
161 checkArgs
:: Args
-> IO ()
162 checkArgs Args
{..} = do
163 let die msg
= hPutStrLn stderr msg
>> exitFailure
164 unless (argTrials
> 0) $ die
"--trials must be greater than 0."
165 unless (argMinRunTimeDifferenceToRerun
>= 0) $
166 die
"--min-run-time-percentage-difference-to-rerun must be non-negative."
167 unless (isSampleLargeEnough argPValue argTrials
) $
168 die
"p-value is too small for the number of trials."
170 printConfig
:: Args
-> IO ()
171 printConfig Args
{..} = do
172 putStrLn "Comparing:"
173 putStrLn $ "1: " ++ argCabal1
++ " " ++ unwords argCabal1Flags
174 callProcess argCabal1
["--version"]
175 putStrLn $ "2: " ++ argCabal2
++ " " ++ unwords argCabal2Flags
176 callProcess argCabal2
["--version"]
177 -- TODO: Print index state.
178 putStrLn "Base package database:"
179 callProcess
"ghc-pkg" ["list"]
181 getPackages
:: Args
-> IO [PackageName
]
182 getPackages Args
{..} = do
186 putStrLn $ "Obtaining the package list (using " ++ argCabal1
++ ") ..."
187 list <- readProcess argCabal1
["list", "--simple-output"] ""
188 return $ nub [mkPackageName n | n
: _
<- words <$> lines list]
190 putStrLn "Using given package list ..."
192 putStrLn $ "Done, got " ++ show (length pkgs
) ++ " packages."
195 data CabalUnderTest
= CabalUnderTest1 | CabalUnderTest2
198 :: Int -- ^ timeout in seconds
199 -> CabalUnderTest
-- ^ cabal under test
200 -> FilePath -- ^ cabal
201 -> [String] -- ^ flags
202 -> IO (PackageName
-> IO CabalTrial
) -- ^ testing function.
203 runCabal timeoutSeconds cabalUnderTest cabal flags
= do
204 tmpDir
<- getTemporaryDirectory
206 -- cabal directory for this cabal under test
207 let cabalDir
= tmpDir
</> "solver-benchmarks-workdir" </> case cabalUnderTest
of
208 CabalUnderTest1
-> "cabal1"
209 CabalUnderTest2
-> "cabal2"
211 putStrLn $ "Cabal directory (for " ++ cabal
++ ") " ++ cabalDir
212 createDirectoryIfMissing
True cabalDir
215 currEnv
<- Map
.fromList
<$> getEnvironment
216 let thisEnv
:: [(String, String)]
217 thisEnv
= Map
.toList
$ currEnv
218 & Map
.insert "CABAL_CONFIG" (cabalDir
</> "config")
219 & Map
.insert "CABAL_DIR" cabalDir
221 -- Initialize the config file, whether or not it already exists
222 runCabalCmdWithEnv cabalDir thisEnv
["user-config", "init", "--force"]
225 putStrLn $ "Running cabal update (using " ++ cabal
++ ") ..."
226 runCabalCmdWithEnv cabalDir thisEnv
["update"]
228 -- return an actual runner
230 ((exitCode
, err
), time
) <- timeEvent
$ do
232 let timeout
= "timeout --foreground -sINT " ++ show timeoutSeconds
238 -- These flags prevent a Cabal project or package environment from
239 -- affecting the install plan.
241 -- Note: we are somewhere in /tmp, hopefully there is no cabal.project on upper level
242 , "--package-env=non-existent-package-env"
244 -- --lib allows solving for packages with libraries or
252 -- The test doesn't currently handle stdout, so we suppress it
253 -- with silent. nowrap simplifies parsing the errors messages.
260 cmd
= (shell
(timeout
++ " " ++ cabalCmd
))
261 { std_err
= CreatePipe
263 , cwd
= Just cabalDir
266 -- TODO: Read stdout and compare the install plans.
267 (_
, _
, Just errh
, ph
) <- createProcess cmd
268 err
<- BS
.hGetContents errh
269 (, err
) <$> waitForProcess ph
272 "After searching the rest of the dependency tree exhaustively"
274 | exitCode
== ExitSuccess
= Solution
275 | exitCode
== ExitFailure
124 = Timeout
276 | fromString exhaustiveMsg `BS
.isInfixOf` err
= NoInstallPlan
277 | fromString
"Backjump limit reached" `BS
.isInfixOf` err
= BackjumpLimit
278 | fromString
"none of the components are available to build" `BS
.isInfixOf` err
= Unbuildable
279 | fromString
"Dependency on unbuildable" `BS
.isInfixOf` err
= UnbuildableDep
280 | fromString
"Dependency cycle between the following components" `BS
.isInfixOf` err
= ComponentCycle
281 | fromString
"Problem with module re-exports" `BS
.isInfixOf` err
= ModReexpIssue
282 | fromString
"There is no package named" `BS
.isInfixOf` err
= PkgNotFound
283 |
otherwise = Unknown
284 return (CabalTrial time result
)
286 runCabalCmdWithEnv cabalDir thisEnv args
= do
287 (ec
, uout
, uerr
) <- readCreateProcessWithExitCode
(proc cabal args
)
288 { cwd
= Just cabalDir
292 unless (ec
== ExitSuccess
) $ do
297 isSampleLargeEnough
:: PValue
Double -> Int -> Bool
298 isSampleLargeEnough pvalue trials
=
299 -- mannWhitneyUCriticalValue, which can fail with too few samples, is only
300 -- used when both sample sizes are less than or equal to 20.
301 trials
> 20 ||
isJust (mannWhitneyUCriticalValue
(trials
, trials
) pvalue
)
303 isSignificantTimeDifference
:: PValue
Double -> [NominalDiffTime
] -> [NominalDiffTime
] -> Bool
304 isSignificantTimeDifference pvalue xs ys
=
305 let toVector
= V
.fromList
. map diffTimeToDouble
306 in case mannWhitneyUtest SamplesDiffer pvalue
(toVector xs
) (toVector ys
) of
307 Nothing
-> error "not enough data for mannWhitneyUtest"
308 Just Significant
-> True
309 Just NotSignificant
-> False
311 -- Should we stop after the first trial of this package to save time? This
312 -- function skips the package if the results are uninteresting and the times are
313 -- within --min-run-time-percentage-difference-to-rerun.
314 shouldContinueAfterFirstTrial
:: Double
320 shouldContinueAfterFirstTrial
0 _ _ _ _
= True
321 shouldContinueAfterFirstTrial _ _ _ Timeout Timeout
= False
322 shouldContinueAfterFirstTrial maxRunTimeDifferenceToIgnore t1 t2 r1 r2
=
323 isSignificantResult r1 r2
324 ||
abs (t1
- t2
) / min t1 t2
>= realToFrac (maxRunTimeDifferenceToIgnore
/ 100)
326 isSignificantResult
:: CabalResult
-> CabalResult
-> Bool
327 isSignificantResult r1 r2
= r1
/= r2 ||
not (isExpectedResult r1
)
329 -- Is this result expected in a benchmark run on all of Hackage?
330 isExpectedResult
:: CabalResult
-> Bool
331 isExpectedResult Solution
= True
332 isExpectedResult NoInstallPlan
= True
333 isExpectedResult BackjumpLimit
= True
334 isExpectedResult Timeout
= True
335 isExpectedResult Unbuildable
= True
336 isExpectedResult UnbuildableDep
= True
337 isExpectedResult ComponentCycle
= True
338 isExpectedResult ModReexpIssue
= True
339 isExpectedResult PkgNotFound
= False
340 isExpectedResult Unknown
= False
342 -- Combine CabalResults from multiple trials. Ignoring timeouts, all results
343 -- should be the same. If they aren't the same, we returns Unknown.
344 combineTrialResults
:: [CabalResult
] -> CabalResult
345 combineTrialResults rs
348 | allEqual
[r | r
<- rs
, r
/= Timeout
] = Timeout
349 |
otherwise = Unknown
351 allEqual
:: Eq a
=> [a
] -> Bool
352 allEqual xs
= length (nub xs
) == 1
354 timeEvent
:: IO a
-> IO (a
, NominalDiffTime
)
356 start
<- getCurrentTime
358 end
<- getCurrentTime
359 return (r
, diffUTCTime end start
)
361 diffTimeToDouble
:: NominalDiffTime
-> Double
362 diffTimeToDouble
= fromRational . toRational
364 parserInfo
:: ParserInfo Args
365 parserInfo
= info
(argParser
<**> helper
)
367 <> progDesc
("Find differences between two cabal commands when solving"
368 ++ " for all packages on Hackage.")
369 <> header
"hackage-benchmark" )
371 argParser
:: Parser Args
376 <> help
"First cabal executable")
380 <> help
"Second cabal executable")
381 <*> option
(words <$> str
)
382 ( long
"cabal1-flags"
385 <> help
"Extra flags for the first cabal executable")
386 <*> option
(words <$> str
)
387 ( long
"cabal2-flags"
390 <> help
"Extra flags for the second cabal executable")
391 <*> option
(map mkPackageName
. words <$> str
)
394 <> metavar
"PACKAGES"
395 <> help
("Space separated list of packages to test, or all of Hackage"
396 ++ " if unspecified"))
398 ( long
"min-run-time-percentage-difference-to-rerun"
401 <> metavar
"PERCENTAGE"
402 <> help
("Stop testing a package when the difference in run times in"
403 ++ " the first trial are within this percentage, in order to"
405 <*> option
(mkPValue
<$> auto
)
408 <> value (mkPValue
0.05)
410 <> help
("p-value used to determine whether to print the results for"
417 <> help
"Number of trials for each package")
419 ( long
"concurrently"
420 <> help
"Run cabals concurrently")
422 ( long
"print-trials"
423 <> help
"Whether to include the results from individual trials in the output")
425 ( long
"print-skipped-packages"
426 <> help
"Whether to include skipped packages in the output")
432 <> help
"Maximum time to run a cabal command, in seconds")