solver-benchmarks/HackageBenchmark.hs

   1 {-# OPTIONS_GHC -fno-warn-orphans #-}
   2 {-# LANGUAGE RecordWildCards #-}
   3 {-# LANGUAGE TupleSections #-}
   4 {-# LANGUAGE ScopedTypeVariables #-}
   5
   6 module HackageBenchmark (
   7     hackageBenchmarkMain
   8
   9 -- Exposed for testing:
  10   , CabalResult(..)
  11   , isSignificantTimeDifference
  12   , combineTrialResults
  13   , isSignificantResult
  14   , shouldContinueAfterFirstTrial
  15   ) where
  16
  17 import Control.Concurrent.Async (concurrently)
  18 import Control.Monad (forM, replicateM, unless, when)
  19 import qualified Data.ByteString as BS
  20 import Data.List (nub, unzip4)
  21 import Data.Maybe (isJust, catMaybes)
  22 import Data.Monoid ((<>))
  23 import Data.String (fromString)
  24 import Data.Function ((&))
  25 import Data.Time (NominalDiffTime, diffUTCTime, getCurrentTime)
  26 import qualified Data.Vector.Unboxed as V
  27 import Options.Applicative
  28 import Statistics.Sample (mean, stdDev, geometricMean)
  29 import Statistics.Test.MannWhitneyU ( PositionTest(..), TestResult(..)
  30                                     , mannWhitneyUCriticalValue
  31                                     , mannWhitneyUtest)
  32 import Statistics.Types (PValue, mkPValue)
  33 import System.Directory (getTemporaryDirectory, createDirectoryIfMissing)
  34 import System.Environment (getEnvironment)
  35 import System.Exit (ExitCode(..), exitWith, exitFailure)
  36 import System.FilePath ((</>))
  37 import System.IO ( BufferMode(LineBuffering), hPutStrLn, hSetBuffering, stderr
  38                  , stdout)
  39 import System.Process ( StdStream(CreatePipe), CreateProcess(..), callProcess
  40                       , createProcess, readProcess, shell, waitForProcess, proc, readCreateProcessWithExitCode )
  41 import Text.Printf (printf)
  42
  43 import qualified Data.Map.Strict as Map
  44
  45 import Distribution.Package (PackageName, mkPackageName, unPackageName)
  46
  47 data Args = Args {
  48     argCabal1                      :: FilePath
  49   , argCabal2                      :: FilePath
  50   , argCabal1Flags                 :: [String]
  51   , argCabal2Flags                 :: [String]
  52   , argPackages                    :: [PackageName]
  53   , argMinRunTimeDifferenceToRerun :: Double
  54   , argPValue                      :: PValue Double
  55   , argTrials                      :: Int
  56   , argConcurrently                :: Bool
  57   , argPrintTrials                 :: Bool
  58   , argPrintSkippedPackages        :: Bool
  59   , argTimeoutSeconds              :: Int
  60   }
  61
  62 data CabalTrial = CabalTrial NominalDiffTime CabalResult
  63
  64 data CabalResult
  65   = Solution
  66   | NoInstallPlan
  67   | BackjumpLimit
  68   | Unbuildable
  69   | UnbuildableDep
  70   | ComponentCycle
  71   | ModReexpIssue
  72   | PkgNotFound
  73   | Timeout
  74   | Unknown
  75   deriving (Eq, Show)
  76
  77 hackageBenchmarkMain :: IO ()
  78 hackageBenchmarkMain = do
  79   hSetBuffering stdout LineBuffering
  80   args@Args {..} <- execParser parserInfo
  81   checkArgs args
  82   printConfig args
  83   pkgs <- getPackages args
  84   putStrLn ""
  85
  86   let concurrently' :: IO a -> IO b -> IO (a, b)
  87       concurrently' | argConcurrently = concurrently
  88                     | otherwise       = \ma mb -> do { a <- ma; b <- mb; return (a, b) }
  89
  90   let -- The maximum length of the heading and package names.
  91       nameColumnWidth :: Int
  92       nameColumnWidth =
  93           maximum $ map length $ "package" : map unPackageName pkgs
  94
  95   -- create cabal runners
  96   runCabal1 <- runCabal argTimeoutSeconds CabalUnderTest1 argCabal1 argCabal1Flags
  97   runCabal2 <- runCabal argTimeoutSeconds CabalUnderTest2 argCabal2 argCabal2Flags
  98
  99   -- When the output contains both trails and summaries, label each row as
 100   -- "trial" or "summary".
 101   when argPrintTrials $ putStr $ printf "%-16s " "trial/summary"
 102   putStrLn $
 103       printf "%-*s %-14s %-14s %11s %11s %11s %11s %11s"
 104              nameColumnWidth "package" "result1" "result2"
 105              "mean1" "mean2" "stddev1" "stddev2" "speedup"
 106
 107   speedups :: [Double] <- fmap catMaybes $ forM pkgs $ \pkg -> do
 108     let printTrial msgType result1 result2 time1 time2 =
 109             putStrLn $
 110             printf "%-16s %-*s %-14s %-14s %10.3fs %10.3fs"
 111                    msgType nameColumnWidth (unPackageName pkg)
 112                    (show result1) (show result2)
 113                    (diffTimeToDouble time1) (diffTimeToDouble time2)
 114
 115     (CabalTrial t1 r1, CabalTrial t2 r2) <- runCabal1 pkg `concurrently'` runCabal2 pkg
 116
 117     if not $
 118        shouldContinueAfterFirstTrial argMinRunTimeDifferenceToRerun t1 t2 r1 r2
 119     then do
 120       when argPrintSkippedPackages $
 121          if argPrintTrials
 122          then printTrial "trial (skipping)" r1 r2 t1 t2
 123          else putStrLn $ printf "%-*s (first run times were too similar)"
 124                                 nameColumnWidth (unPackageName pkg)
 125       return Nothing
 126     else do
 127       when argPrintTrials $ printTrial "trial" r1 r2 t1 t2
 128       (ts1, ts2, rs1, rs2) <- (unzip4 . ((t1, t2, r1, r2) :) <$>)
 129                             . replicateM (argTrials - 1) $ do
 130
 131         (CabalTrial t1' r1', CabalTrial t2' r2') <- runCabal1 pkg `concurrently'` runCabal2 pkg
 132         when argPrintTrials $ printTrial "trial" r1' r2' t1' t2'
 133         return (t1', t2', r1', r2')
 134
 135       let result1 = combineTrialResults rs1
 136           result2 = combineTrialResults rs2
 137           times1 = V.fromList (map diffTimeToDouble ts1)
 138           times2 = V.fromList (map diffTimeToDouble ts2)
 139           mean1 = mean times1
 140           mean2 = mean times2
 141           stddev1 = stdDev times1
 142           stddev2 = stdDev times2
 143           speedup = mean1 / mean2
 144
 145       when argPrintTrials $ putStr $ printf "%-16s " "summary"
 146       if isSignificantResult result1 result2
 147           || isSignificantTimeDifference argPValue ts1 ts2
 148       then putStrLn $
 149            printf "%-*s %-14s %-14s %10.3fs %10.3fs %10.3fs %10.3fs %10.3f"
 150                   nameColumnWidth (unPackageName pkg)
 151                   (show result1) (show result2) mean1 mean2 stddev1 stddev2 speedup
 152       else when (argPrintTrials || argPrintSkippedPackages) $
 153            putStrLn $
 154            printf "%-*s (not significant, speedup = %10.3f)" nameColumnWidth (unPackageName pkg) speedup
 155
 156       -- return speedup value
 157       return (Just speedup)
 158
 159   -- finally, calculate the geometric mean of speedups
 160   printf "Geometric mean of %d packages' speedups is %10.3f\n" (length speedups) (geometricMean (V.fromList speedups))
 161
 162   where
 163     checkArgs :: Args -> IO ()
 164     checkArgs Args {..} = do
 165       let die msg = hPutStrLn stderr msg >> exitFailure
 166       unless (argTrials > 0) $ die "--trials must be greater than 0."
 167       unless (argMinRunTimeDifferenceToRerun >= 0) $
 168           die "--min-run-time-percentage-difference-to-rerun must be non-negative."
 169       unless (isSampleLargeEnough argPValue argTrials) $
 170           die "p-value is too small for the number of trials."
 171
 172     printConfig :: Args -> IO ()
 173     printConfig Args {..} = do
 174       putStrLn "Comparing:"
 175       putStrLn $ "1: " ++ argCabal1 ++ " " ++ unwords argCabal1Flags
 176       callProcess argCabal1 ["--version"]
 177       putStrLn $ "2: " ++ argCabal2 ++ " " ++ unwords argCabal2Flags
 178       callProcess argCabal2 ["--version"]
 179       -- TODO: Print index state.
 180       putStrLn "Base package database:"
 181       callProcess "ghc-pkg" ["list"]
 182
 183     getPackages :: Args -> IO [PackageName]
 184     getPackages Args {..} = do
 185       pkgs <-
 186           if null argPackages
 187           then do
 188             putStrLn $ "Obtaining the package list (using " ++ argCabal1 ++ ") ..."
 189             list <- readProcess argCabal1 ["list", "--simple-output"] ""
 190             return $ nub [mkPackageName $ head (words line) | line <- lines list]
 191           else do
 192             putStrLn "Using given package list ..."
 193             return argPackages
 194       putStrLn $ "Done, got " ++ show (length pkgs) ++ " packages."
 195       return pkgs
 196
 197 data CabalUnderTest = CabalUnderTest1 | CabalUnderTest2
 198
 199 runCabal
 200     :: Int             -- ^ timeout in seconds
 201     -> CabalUnderTest  -- ^ cabal under test
 202     -> FilePath        -- ^ cabal
 203     -> [String]        -- ^ flags
 204     -> IO (PackageName  -> IO CabalTrial)  -- ^ testing function.
 205 runCabal timeoutSeconds cabalUnderTest cabal flags = do
 206   tmpDir <- getTemporaryDirectory
 207
 208   -- cabal directory for this cabal under test
 209   let cabalDir = tmpDir </> "solver-benchmarks-workdir" </> case cabalUnderTest of
 210           CabalUnderTest1 -> "cabal1"
 211           CabalUnderTest2 -> "cabal2"
 212
 213   putStrLn $ "Cabal directory (for " ++ cabal ++ ") " ++ cabalDir
 214   createDirectoryIfMissing True cabalDir
 215
 216   -- shell environment
 217   currEnv <- Map.fromList <$>  getEnvironment
 218   let thisEnv :: [(String, String)]
 219       thisEnv = Map.toList $ currEnv
 220           & Map.insert "CABAL_CONFIG" (cabalDir </> "config")
 221           & Map.insert "CABAL_DIR"     cabalDir
 222
 223   -- Initialize the config file, whether or not it already exists
 224   runCabalCmdWithEnv cabalDir thisEnv ["user-config", "init", "--force"]
 225
 226   -- Run cabal update
 227   putStrLn $ "Running cabal update (using " ++ cabal ++ ") ..."
 228   runCabalCmdWithEnv cabalDir thisEnv ["update"]
 229
 230   -- return an actual runner
 231   return $ \pkg -> do
 232     ((exitCode, err), time) <- timeEvent $ do
 233
 234       let timeout = "timeout --foreground -sINT " ++ show timeoutSeconds
 235           cabalCmd = unwords $
 236               [ cabal
 237
 238               , "install"
 239
 240                 -- These flags prevent a Cabal project or package environment from
 241                 -- affecting the install plan.
 242                 --
 243                 -- Note: we are somewhere in /tmp, hopefully there is no cabal.project on upper level
 244               , "--package-env=non-existent-package-env"
 245
 246                 -- --lib allows solving for packages with libraries or
 247                 -- executables.
 248               , "--lib"
 249
 250               , unPackageName pkg
 251
 252               , "--dry-run"
 253
 254                 -- The test doesn't currently handle stdout, so we suppress it
 255                 -- with silent. nowrap simplifies parsing the errors messages.
 256               , "-vsilent+nowrap"
 257
 258               ]
 259
 260                ++ flags
 261
 262           cmd = (shell (timeout ++ " " ++ cabalCmd))
 263               { std_err = CreatePipe
 264               , env = Just thisEnv
 265               , cwd = Just cabalDir
 266               }
 267
 268       -- TODO: Read stdout and compare the install plans.
 269       (_, _, Just errh, ph) <- createProcess cmd
 270       err <- BS.hGetContents errh
 271       (, err) <$> waitForProcess ph
 272
 273     let exhaustiveMsg =
 274             "After searching the rest of the dependency tree exhaustively"
 275         result
 276           | exitCode == ExitSuccess                                                          = Solution
 277           | exitCode == ExitFailure 124                                                      = Timeout
 278           | fromString exhaustiveMsg `BS.isInfixOf` err                                       = NoInstallPlan
 279           | fromString "Backjump limit reached" `BS.isInfixOf` err                            = BackjumpLimit
 280           | fromString "none of the components are available to build" `BS.isInfixOf` err     = Unbuildable
 281           | fromString "Dependency on unbuildable" `BS.isInfixOf` err                         = UnbuildableDep
 282           | fromString "Dependency cycle between the following components" `BS.isInfixOf` err = ComponentCycle
 283           | fromString "Problem with module re-exports" `BS.isInfixOf` err                    = ModReexpIssue
 284           | fromString "There is no package named" `BS.isInfixOf` err                         = PkgNotFound
 285           | otherwise                                                                        = Unknown
 286     return (CabalTrial time result)
 287   where
 288     runCabalCmdWithEnv cabalDir thisEnv args = do
 289       (ec, uout, uerr) <- readCreateProcessWithExitCode (proc cabal args)
 290           { cwd = Just cabalDir
 291           , env = Just thisEnv
 292           }
 293           ""
 294       unless (ec == ExitSuccess) $ do
 295           putStrLn uout
 296           putStrLn uerr
 297           exitWith ec
 298
 299 isSampleLargeEnough :: PValue Double -> Int -> Bool
 300 isSampleLargeEnough pvalue trials =
 301     -- mannWhitneyUCriticalValue, which can fail with too few samples, is only
 302     -- used when both sample sizes are less than or equal to 20.
 303     trials > 20 || isJust (mannWhitneyUCriticalValue (trials, trials) pvalue)
 304
 305 isSignificantTimeDifference :: PValue Double -> [NominalDiffTime] -> [NominalDiffTime] -> Bool
 306 isSignificantTimeDifference pvalue xs ys =
 307   let toVector = V.fromList . map diffTimeToDouble
 308   in case mannWhitneyUtest SamplesDiffer pvalue (toVector xs) (toVector ys) of
 309        Nothing             -> error "not enough data for mannWhitneyUtest"
 310        Just Significant    -> True
 311        Just NotSignificant -> False
 312
 313 -- Should we stop after the first trial of this package to save time? This
 314 -- function skips the package if the results are uninteresting and the times are
 315 -- within --min-run-time-percentage-difference-to-rerun.
 316 shouldContinueAfterFirstTrial :: Double
 317                               -> NominalDiffTime
 318                               -> NominalDiffTime
 319                               -> CabalResult
 320                               -> CabalResult
 321                               -> Bool
 322 shouldContinueAfterFirstTrial 0                            _  _  _       _       = True
 323 shouldContinueAfterFirstTrial _                            _  _  Timeout Timeout = False
 324 shouldContinueAfterFirstTrial maxRunTimeDifferenceToIgnore t1 t2 r1      r2      =
 325     isSignificantResult r1 r2
 326  || abs (t1 - t2) / min t1 t2 >= realToFrac (maxRunTimeDifferenceToIgnore / 100)
 327
 328 isSignificantResult :: CabalResult -> CabalResult -> Bool
 329 isSignificantResult r1 r2 = r1 /= r2 || not (isExpectedResult r1)
 330
 331 -- Is this result expected in a benchmark run on all of Hackage?
 332 isExpectedResult :: CabalResult -> Bool
 333 isExpectedResult Solution       = True
 334 isExpectedResult NoInstallPlan  = True
 335 isExpectedResult BackjumpLimit  = True
 336 isExpectedResult Timeout        = True
 337 isExpectedResult Unbuildable    = True
 338 isExpectedResult UnbuildableDep = True
 339 isExpectedResult ComponentCycle = True
 340 isExpectedResult ModReexpIssue  = True
 341 isExpectedResult PkgNotFound    = False
 342 isExpectedResult Unknown        = False
 343
 344 -- Combine CabalResults from multiple trials. Ignoring timeouts, all results
 345 -- should be the same. If they aren't the same, we returns Unknown.
 346 combineTrialResults :: [CabalResult] -> CabalResult
 347 combineTrialResults rs
 348   | allEqual rs                          = head rs
 349   | allEqual [r | r <- rs, r /= Timeout] = Timeout
 350   | otherwise                            = Unknown
 351   where
 352     allEqual :: Eq a => [a] -> Bool
 353     allEqual xs = length (nub xs) == 1
 354
 355 timeEvent :: IO a -> IO (a, NominalDiffTime)
 356 timeEvent task = do
 357   start <- getCurrentTime
 358   r <- task
 359   end <- getCurrentTime
 360   return (r, diffUTCTime end start)
 361
 362 diffTimeToDouble :: NominalDiffTime -> Double
 363 diffTimeToDouble = fromRational . toRational
 364
 365 parserInfo :: ParserInfo Args
 366 parserInfo = info (argParser <**> helper)
 367      ( fullDesc
 368     <> progDesc ("Find differences between two cabal commands when solving"
 369                    ++ " for all packages on Hackage.")
 370     <> header "hackage-benchmark" )
 371
 372 argParser :: Parser Args
 373 argParser = Args
 374     <$> strOption
 375          ( long "cabal1"
 376         <> metavar "PATH"
 377         <> help "First cabal executable")
 378     <*> strOption
 379          ( long "cabal2"
 380         <> metavar "PATH"
 381         <> help "Second cabal executable")
 382     <*> option (words <$> str)
 383          ( long "cabal1-flags"
 384         <> value []
 385         <> metavar "FLAGS"
 386         <> help "Extra flags for the first cabal executable")
 387     <*> option (words <$> str)
 388          ( long "cabal2-flags"
 389         <> value []
 390         <> metavar "FLAGS"
 391         <> help "Extra flags for the second cabal executable")
 392     <*> option (map mkPackageName . words <$> str)
 393          ( long "packages"
 394         <> value []
 395         <> metavar "PACKAGES"
 396         <> help ("Space separated list of packages to test, or all of Hackage"
 397                    ++ " if unspecified"))
 398     <*> option auto
 399          ( long "min-run-time-percentage-difference-to-rerun"
 400         <> showDefault
 401         <> value 0.0
 402         <> metavar "PERCENTAGE"
 403         <> help ("Stop testing a package when the difference in run times in"
 404                    ++ " the first trial are within this percentage, in order to"
 405                    ++ " save time"))
 406     <*> option (mkPValue <$> auto)
 407          ( long "pvalue"
 408         <> showDefault
 409         <> value (mkPValue 0.05)
 410         <> metavar "DOUBLE"
 411         <> help ("p-value used to determine whether to print the results for"
 412                    ++ " each package"))
 413     <*> option auto
 414          ( long "trials"
 415         <> showDefault
 416         <> value 10
 417         <> metavar "N"
 418         <> help "Number of trials for each package")
 419     <*> switch
 420          ( long "concurrently"
 421         <> help "Run cabals concurrently")
 422     <*> switch
 423          ( long "print-trials"
 424         <> help "Whether to include the results from individual trials in the output")
 425     <*> switch
 426          ( long "print-skipped-packages"
 427         <> help "Whether to include skipped packages in the output")
 428     <*> option auto
 429          ( long "timeout"
 430         <> showDefault
 431         <> value 90
 432         <> metavar "SECONDS"
 433         <> help "Maximum time to run a cabal command, in seconds")