solver-benchmarks/HackageBenchmark.hs

   1 {-# LANGUAGE RecordWildCards #-}
   2 {-# LANGUAGE TupleSections #-}
   3 {-# LANGUAGE ScopedTypeVariables #-}
   4
   5 module HackageBenchmark (
   6     hackageBenchmarkMain
   7
   8 -- Exposed for testing:
   9   , CabalResult(..)
  10   , isSignificantTimeDifference
  11   , combineTrialResults
  12   , isSignificantResult
  13   , shouldContinueAfterFirstTrial
  14   ) where
  15
  16 import Control.Concurrent.Async (concurrently)
  17 import Control.Monad (forM, replicateM, unless, when)
  18 import qualified Data.ByteString as BS
  19 import Data.List (nub, unzip4)
  20 import Data.Maybe (isJust, catMaybes)
  21 import Data.String (fromString)
  22 import Data.Function ((&))
  23 import Data.Time (NominalDiffTime, diffUTCTime, getCurrentTime)
  24 import qualified Data.Vector.Unboxed as V
  25 import Options.Applicative
  26 import Statistics.Sample (mean, stdDev, geometricMean)
  27 import Statistics.Test.MannWhitneyU ( PositionTest(..), TestResult(..)
  28                                     , mannWhitneyUCriticalValue
  29                                     , mannWhitneyUtest)
  30 import Statistics.Types (PValue, mkPValue)
  31 import System.Directory (getTemporaryDirectory, createDirectoryIfMissing)
  32 import System.Environment (getEnvironment)
  33 import System.Exit (ExitCode(..), exitWith, exitFailure)
  34 import System.FilePath ((</>))
  35 import System.IO ( BufferMode(LineBuffering), hPutStrLn, hSetBuffering, stderr
  36                  , stdout)
  37 import System.Process ( StdStream(CreatePipe), CreateProcess(..), callProcess
  38                       , createProcess, readProcess, shell, waitForProcess, proc, readCreateProcessWithExitCode )
  39 import Text.Printf (printf)
  40
  41 import qualified Data.Map.Strict as Map
  42
  43 import Distribution.Package (PackageName, mkPackageName, unPackageName)
  44
  45 data Args = Args {
  46     argCabal1                      :: FilePath
  47   , argCabal2                      :: FilePath
  48   , argCabal1Flags                 :: [String]
  49   , argCabal2Flags                 :: [String]
  50   , argPackages                    :: [PackageName]
  51   , argMinRunTimeDifferenceToRerun :: Double
  52   , argPValue                      :: PValue Double
  53   , argTrials                      :: Int
  54   , argConcurrently                :: Bool
  55   , argPrintTrials                 :: Bool
  56   , argPrintSkippedPackages        :: Bool
  57   , argTimeoutSeconds              :: Int
  58   }
  59
  60 data CabalTrial = CabalTrial NominalDiffTime CabalResult
  61
  62 data CabalResult
  63   = Solution
  64   | NoInstallPlan
  65   | BackjumpLimit
  66   | Unbuildable
  67   | UnbuildableDep
  68   | ComponentCycle
  69   | ModReexpIssue
  70   | PkgNotFound
  71   | Timeout
  72   | Unknown
  73   deriving (Eq, Show)
  74
  75 hackageBenchmarkMain :: IO ()
  76 hackageBenchmarkMain = do
  77   hSetBuffering stdout LineBuffering
  78   args@Args {..} <- execParser parserInfo
  79   checkArgs args
  80   printConfig args
  81   pkgs <- getPackages args
  82   putStrLn ""
  83
  84   let concurrently' :: IO a -> IO b -> IO (a, b)
  85       concurrently' | argConcurrently = concurrently
  86                     | otherwise       = \ma mb -> do { a <- ma; b <- mb; return (a, b) }
  87
  88   let -- The maximum length of the heading and package names.
  89       nameColumnWidth :: Int
  90       nameColumnWidth =
  91           maximum $ map length $ "package" : map unPackageName pkgs
  92
  93   -- create cabal runners
  94   runCabal1 <- runCabal argTimeoutSeconds CabalUnderTest1 argCabal1 argCabal1Flags
  95   runCabal2 <- runCabal argTimeoutSeconds CabalUnderTest2 argCabal2 argCabal2Flags
  96
  97   -- When the output contains both trails and summaries, label each row as
  98   -- "trial" or "summary".
  99   when argPrintTrials $ putStr $ printf "%-16s " "trial/summary"
 100   putStrLn $
 101       printf "%-*s %-14s %-14s %11s %11s %11s %11s %11s"
 102              nameColumnWidth "package" "result1" "result2"
 103              "mean1" "mean2" "stddev1" "stddev2" "speedup"
 104
 105   speedups :: [Double] <- fmap catMaybes $ forM pkgs $ \pkg -> do
 106     let printTrial msgType result1 result2 time1 time2 =
 107             putStrLn $
 108             printf "%-16s %-*s %-14s %-14s %10.3fs %10.3fs"
 109                    msgType nameColumnWidth (unPackageName pkg)
 110                    (show result1) (show result2)
 111                    (diffTimeToDouble time1) (diffTimeToDouble time2)
 112
 113     (CabalTrial t1 r1, CabalTrial t2 r2) <- runCabal1 pkg `concurrently'` runCabal2 pkg
 114
 115     if not $
 116        shouldContinueAfterFirstTrial argMinRunTimeDifferenceToRerun t1 t2 r1 r2
 117     then do
 118       when argPrintSkippedPackages $
 119          if argPrintTrials
 120          then printTrial "trial (skipping)" r1 r2 t1 t2
 121          else putStrLn $ printf "%-*s (first run times were too similar)"
 122                                 nameColumnWidth (unPackageName pkg)
 123       return Nothing
 124     else do
 125       when argPrintTrials $ printTrial "trial" r1 r2 t1 t2
 126       (ts1, ts2, rs1, rs2) <- (unzip4 . ((t1, t2, r1, r2) :) <$>)
 127                             . replicateM (argTrials - 1) $ do
 128
 129         (CabalTrial t1' r1', CabalTrial t2' r2') <- runCabal1 pkg `concurrently'` runCabal2 pkg
 130         when argPrintTrials $ printTrial "trial" r1' r2' t1' t2'
 131         return (t1', t2', r1', r2')
 132
 133       let result1 = combineTrialResults rs1
 134           result2 = combineTrialResults rs2
 135           times1 = V.fromList (map diffTimeToDouble ts1)
 136           times2 = V.fromList (map diffTimeToDouble ts2)
 137           mean1 = mean times1
 138           mean2 = mean times2
 139           stddev1 = stdDev times1
 140           stddev2 = stdDev times2
 141           speedup = mean1 / mean2
 142
 143       when argPrintTrials $ putStr $ printf "%-16s " "summary"
 144       if isSignificantResult result1 result2
 145           || isSignificantTimeDifference argPValue ts1 ts2
 146       then putStrLn $
 147            printf "%-*s %-14s %-14s %10.3fs %10.3fs %10.3fs %10.3fs %10.3f"
 148                   nameColumnWidth (unPackageName pkg)
 149                   (show result1) (show result2) mean1 mean2 stddev1 stddev2 speedup
 150       else when (argPrintTrials || argPrintSkippedPackages) $
 151            putStrLn $
 152            printf "%-*s (not significant, speedup = %10.3f)" nameColumnWidth (unPackageName pkg) speedup
 153
 154       -- return speedup value
 155       return (Just speedup)
 156
 157   -- finally, calculate the geometric mean of speedups
 158   printf "Geometric mean of %d packages' speedups is %10.3f\n" (length speedups) (geometricMean (V.fromList speedups))
 159
 160   where
 161     checkArgs :: Args -> IO ()
 162     checkArgs Args {..} = do
 163       let die msg = hPutStrLn stderr msg >> exitFailure
 164       unless (argTrials > 0) $ die "--trials must be greater than 0."
 165       unless (argMinRunTimeDifferenceToRerun >= 0) $
 166           die "--min-run-time-percentage-difference-to-rerun must be non-negative."
 167       unless (isSampleLargeEnough argPValue argTrials) $
 168           die "p-value is too small for the number of trials."
 169
 170     printConfig :: Args -> IO ()
 171     printConfig Args {..} = do
 172       putStrLn "Comparing:"
 173       putStrLn $ "1: " ++ argCabal1 ++ " " ++ unwords argCabal1Flags
 174       callProcess argCabal1 ["--version"]
 175       putStrLn $ "2: " ++ argCabal2 ++ " " ++ unwords argCabal2Flags
 176       callProcess argCabal2 ["--version"]
 177       -- TODO: Print index state.
 178       putStrLn "Base package database:"
 179       callProcess "ghc-pkg" ["list"]
 180
 181     getPackages :: Args -> IO [PackageName]
 182     getPackages Args {..} = do
 183       pkgs <-
 184           if null argPackages
 185           then do
 186             putStrLn $ "Obtaining the package list (using " ++ argCabal1 ++ ") ..."
 187             list <- readProcess argCabal1 ["list", "--simple-output"] ""
 188             return $ nub [mkPackageName n | n : _ <- words <$> lines list]
 189           else do
 190             putStrLn "Using given package list ..."
 191             return argPackages
 192       putStrLn $ "Done, got " ++ show (length pkgs) ++ " packages."
 193       return pkgs
 194
 195 data CabalUnderTest = CabalUnderTest1 | CabalUnderTest2
 196
 197 runCabal
 198     :: Int             -- ^ timeout in seconds
 199     -> CabalUnderTest  -- ^ cabal under test
 200     -> FilePath        -- ^ cabal
 201     -> [String]        -- ^ flags
 202     -> IO (PackageName  -> IO CabalTrial)  -- ^ testing function.
 203 runCabal timeoutSeconds cabalUnderTest cabal flags = do
 204   tmpDir <- getTemporaryDirectory
 205
 206   -- cabal directory for this cabal under test
 207   let cabalDir = tmpDir </> "solver-benchmarks-workdir" </> case cabalUnderTest of
 208           CabalUnderTest1 -> "cabal1"
 209           CabalUnderTest2 -> "cabal2"
 210
 211   putStrLn $ "Cabal directory (for " ++ cabal ++ ") " ++ cabalDir
 212   createDirectoryIfMissing True cabalDir
 213
 214   -- shell environment
 215   currEnv <- Map.fromList <$>  getEnvironment
 216   let thisEnv :: [(String, String)]
 217       thisEnv = Map.toList $ currEnv
 218           & Map.insert "CABAL_CONFIG" (cabalDir </> "config")
 219           & Map.insert "CABAL_DIR"     cabalDir
 220
 221   -- Initialize the config file, whether or not it already exists
 222   runCabalCmdWithEnv cabalDir thisEnv ["user-config", "init", "--force"]
 223
 224   -- Run cabal update
 225   putStrLn $ "Running cabal update (using " ++ cabal ++ ") ..."
 226   runCabalCmdWithEnv cabalDir thisEnv ["update"]
 227
 228   -- return an actual runner
 229   return $ \pkg -> do
 230     ((exitCode, err), time) <- timeEvent $ do
 231
 232       let timeout = "timeout --foreground -sINT " ++ show timeoutSeconds
 233           cabalCmd = unwords $
 234               [ cabal
 235
 236               , "install"
 237
 238                 -- These flags prevent a Cabal project or package environment from
 239                 -- affecting the install plan.
 240                 --
 241                 -- Note: we are somewhere in /tmp, hopefully there is no cabal.project on upper level
 242               , "--package-env=non-existent-package-env"
 243
 244                 -- --lib allows solving for packages with libraries or
 245                 -- executables.
 246               , "--lib"
 247
 248               , unPackageName pkg
 249
 250               , "--dry-run"
 251
 252                 -- The test doesn't currently handle stdout, so we suppress it
 253                 -- with silent. nowrap simplifies parsing the errors messages.
 254               , "-vsilent+nowrap"
 255
 256               ]
 257
 258                ++ flags
 259
 260           cmd = (shell (timeout ++ " " ++ cabalCmd))
 261               { std_err = CreatePipe
 262               , env = Just thisEnv
 263               , cwd = Just cabalDir
 264               }
 265
 266       -- TODO: Read stdout and compare the install plans.
 267       (_, _, Just errh, ph) <- createProcess cmd
 268       err <- BS.hGetContents errh
 269       (, err) <$> waitForProcess ph
 270
 271     let exhaustiveMsg =
 272             "After searching the rest of the dependency tree exhaustively"
 273         result
 274           | exitCode == ExitSuccess                                                          = Solution
 275           | exitCode == ExitFailure 124                                                      = Timeout
 276           | fromString exhaustiveMsg `BS.isInfixOf` err                                       = NoInstallPlan
 277           | fromString "Backjump limit reached" `BS.isInfixOf` err                            = BackjumpLimit
 278           | fromString "none of the components are available to build" `BS.isInfixOf` err     = Unbuildable
 279           | fromString "Dependency on unbuildable" `BS.isInfixOf` err                         = UnbuildableDep
 280           | fromString "Dependency cycle between the following components" `BS.isInfixOf` err = ComponentCycle
 281           | fromString "Problem with module re-exports" `BS.isInfixOf` err                    = ModReexpIssue
 282           | fromString "There is no package named" `BS.isInfixOf` err                         = PkgNotFound
 283           | otherwise                                                                        = Unknown
 284     return (CabalTrial time result)
 285   where
 286     runCabalCmdWithEnv cabalDir thisEnv args = do
 287       (ec, uout, uerr) <- readCreateProcessWithExitCode (proc cabal args)
 288           { cwd = Just cabalDir
 289           , env = Just thisEnv
 290           }
 291           ""
 292       unless (ec == ExitSuccess) $ do
 293           putStrLn uout
 294           putStrLn uerr
 295           exitWith ec
 296
 297 isSampleLargeEnough :: PValue Double -> Int -> Bool
 298 isSampleLargeEnough pvalue trials =
 299     -- mannWhitneyUCriticalValue, which can fail with too few samples, is only
 300     -- used when both sample sizes are less than or equal to 20.
 301     trials > 20 || isJust (mannWhitneyUCriticalValue (trials, trials) pvalue)
 302
 303 isSignificantTimeDifference :: PValue Double -> [NominalDiffTime] -> [NominalDiffTime] -> Bool
 304 isSignificantTimeDifference pvalue xs ys =
 305   let toVector = V.fromList . map diffTimeToDouble
 306   in case mannWhitneyUtest SamplesDiffer pvalue (toVector xs) (toVector ys) of
 307        Nothing             -> error "not enough data for mannWhitneyUtest"
 308        Just Significant    -> True
 309        Just NotSignificant -> False
 310
 311 -- Should we stop after the first trial of this package to save time? This
 312 -- function skips the package if the results are uninteresting and the times are
 313 -- within --min-run-time-percentage-difference-to-rerun.
 314 shouldContinueAfterFirstTrial :: Double
 315                               -> NominalDiffTime
 316                               -> NominalDiffTime
 317                               -> CabalResult
 318                               -> CabalResult
 319                               -> Bool
 320 shouldContinueAfterFirstTrial 0                            _  _  _       _       = True
 321 shouldContinueAfterFirstTrial _                            _  _  Timeout Timeout = False
 322 shouldContinueAfterFirstTrial maxRunTimeDifferenceToIgnore t1 t2 r1      r2      =
 323     isSignificantResult r1 r2
 324  || abs (t1 - t2) / min t1 t2 >= realToFrac (maxRunTimeDifferenceToIgnore / 100)
 325
 326 isSignificantResult :: CabalResult -> CabalResult -> Bool
 327 isSignificantResult r1 r2 = r1 /= r2 || not (isExpectedResult r1)
 328
 329 -- Is this result expected in a benchmark run on all of Hackage?
 330 isExpectedResult :: CabalResult -> Bool
 331 isExpectedResult Solution       = True
 332 isExpectedResult NoInstallPlan  = True
 333 isExpectedResult BackjumpLimit  = True
 334 isExpectedResult Timeout        = True
 335 isExpectedResult Unbuildable    = True
 336 isExpectedResult UnbuildableDep = True
 337 isExpectedResult ComponentCycle = True
 338 isExpectedResult ModReexpIssue  = True
 339 isExpectedResult PkgNotFound    = False
 340 isExpectedResult Unknown        = False
 341
 342 -- Combine CabalResults from multiple trials. Ignoring timeouts, all results
 343 -- should be the same. If they aren't the same, we returns Unknown.
 344 combineTrialResults :: [CabalResult] -> CabalResult
 345 combineTrialResults rs
 346   | r:_ <- rs
 347   , allEqual rs                          = r
 348   | allEqual [r | r <- rs, r /= Timeout] = Timeout
 349   | otherwise                            = Unknown
 350   where
 351     allEqual :: Eq a => [a] -> Bool
 352     allEqual xs = length (nub xs) == 1
 353
 354 timeEvent :: IO a -> IO (a, NominalDiffTime)
 355 timeEvent task = do
 356   start <- getCurrentTime
 357   r <- task
 358   end <- getCurrentTime
 359   return (r, diffUTCTime end start)
 360
 361 diffTimeToDouble :: NominalDiffTime -> Double
 362 diffTimeToDouble = fromRational . toRational
 363
 364 parserInfo :: ParserInfo Args
 365 parserInfo = info (argParser <**> helper)
 366      ( fullDesc
 367     <> progDesc ("Find differences between two cabal commands when solving"
 368                    ++ " for all packages on Hackage.")
 369     <> header "hackage-benchmark" )
 370
 371 argParser :: Parser Args
 372 argParser = Args
 373     <$> strOption
 374          ( long "cabal1"
 375         <> metavar "PATH"
 376         <> help "First cabal executable")
 377     <*> strOption
 378          ( long "cabal2"
 379         <> metavar "PATH"
 380         <> help "Second cabal executable")
 381     <*> option (words <$> str)
 382          ( long "cabal1-flags"
 383         <> value []
 384         <> metavar "FLAGS"
 385         <> help "Extra flags for the first cabal executable")
 386     <*> option (words <$> str)
 387          ( long "cabal2-flags"
 388         <> value []
 389         <> metavar "FLAGS"
 390         <> help "Extra flags for the second cabal executable")
 391     <*> option (map mkPackageName . words <$> str)
 392          ( long "packages"
 393         <> value []
 394         <> metavar "PACKAGES"
 395         <> help ("Space separated list of packages to test, or all of Hackage"
 396                    ++ " if unspecified"))
 397     <*> option auto
 398          ( long "min-run-time-percentage-difference-to-rerun"
 399         <> showDefault
 400         <> value 0.0
 401         <> metavar "PERCENTAGE"
 402         <> help ("Stop testing a package when the difference in run times in"
 403                    ++ " the first trial are within this percentage, in order to"
 404                    ++ " save time"))
 405     <*> option (mkPValue <$> auto)
 406          ( long "pvalue"
 407         <> showDefault
 408         <> value (mkPValue 0.05)
 409         <> metavar "DOUBLE"
 410         <> help ("p-value used to determine whether to print the results for"
 411                    ++ " each package"))
 412     <*> option auto
 413          ( long "trials"
 414         <> showDefault
 415         <> value 10
 416         <> metavar "N"
 417         <> help "Number of trials for each package")
 418     <*> switch
 419          ( long "concurrently"
 420         <> help "Run cabals concurrently")
 421     <*> switch
 422          ( long "print-trials"
 423         <> help "Whether to include the results from individual trials in the output")
 424     <*> switch
 425          ( long "print-skipped-packages"
 426         <> help "Whether to include skipped packages in the output")
 427     <*> option auto
 428          ( long "timeout"
 429         <> showDefault
 430         <> value 90
 431         <> metavar "SECONDS"
 432         <> help "Maximum time to run a cabal command, in seconds")