dev-support/hbasetests.sh

   1 #!/usr/bin/env bash
   2 ##
   3 # Licensed to the Apache Software Foundation (ASF) under one
   4 # or more contributor license agreements.  See the NOTICE file
   5 # distributed with this work for additional information
   6 # regarding copyright ownership.  The ASF licenses this file
   7 # to you under the Apache License, Version 2.0 (the
   8 # "License"); you may not use this file except in compliance
   9 # with the License.  You may obtain a copy of the License at
  10 #
  11 #     http://www.apache.org/licenses/LICENSE-2.0
  12 #
  13 # Unless required by applicable law or agreed to in writing, software
  14 # distributed under the License is distributed on an "AS IS" BASIS,
  15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16 # See the License for the specific language governing permissions and
  17 # limitations under the License.
  18 ##
  19 #
  20 # This script:
  21 #   - analyse the content of the .java test file to split them between
  22 #         small/medium/large
  23 #   - launch the small tests in a single maven, with surefire
  24 #          parallelisation activated
  25 #   - launch the medium & large in two maven, parallelized
  26 #   - the flaky tests are run at the end, not parallelized
  27 #   - present a small report of the global results
  28 #   - copy the failed test reports with prefix 'fail_' and a timestamp
  29 #         to protect them from a later deletion by maven
  30 #   - if configured for, relaunch the tests in errors
  31 #
  32 #
  33 # Caveats:
  34 #   - multiple maven are launch, hence there can be recompilation
  35 #  between the tests if a file is modified. For non flaky tests and
  36 #  parallelization, the frame is the time to execute the small tests,
  37 #  so it's around 4 minutes.
  38 #   - Note that surefire is buggy, and the results presented while
  39 #  running may be wrong. For example, it can says that a class tests
  40 #  have 5 errors. When you look at the file it wrote, it says that the
  41 #  2 tests are ok, and in the class there are actually two tests
  42 #  methods, not five. If you generate the report at the end with
  43 #  surefire-report it's fine however.
  44 #
  45 ######################################### parameters
  46
  47 #mvn test -Dtest=org.apache.hadoop.hbase.regionserver.TestScanWithBloomError $*
  48
  49 #exit
  50
  51 #set to 0 to run only developpers tests (small & medium categories)
  52 runAllTests=0
  53
  54 #set to 1 to replay the failed tests. Previous reports are kept in
  55 # fail_ files
  56 replayFailed=0
  57
  58 #set to 0 to run all medium & large tests in a single maven operation
  59 # instead of two
  60 parallelMaven=1
  61
  62 #harcoded list of tests that often fail. We don't want to add any
  63 # complexity around then so there are not run in parallel but after
  64 # the others
  65 #The ',' at the end is mandatory
  66 flakyTests=
  67 #org.apache.hadoop.hbase.mapreduce.TestTableInputFormatScan,org.apache.hadoop.hbase.catalog.TestMetaTableAccessorNoCluster,org.apache.hadoop.hbase.catalog.TestMetaTableAccessor,org.apache.hadoop.hbase.mapreduce.TestHFileOutputFormat,org.apache.hadoop.hbase.mapred.TestTableMapReduce,org.apache.hadoop.hbase.coprocessor.TestMasterCoprocessorExceptionWithAbort,org.apache.hadoop.hbase.coprocessor.TestMasterCoprocessorExceptionWithRemove,org.apache.hadoop.hbase.client.TestAdmin,org.apache.hadoop.hbase.master.TestMasterFailover,org.apache.hadoop.hbase.regionserver.wal.TestLogRolling,org.apache.hadoop.hbase.master.TestDistributedLogSplitting,org.apache.hadoop.hbase.master.TestMasterRestartAfterDisablingTable,org.apache.hadoop.hbase.TestGlobalMemStoreSize,
  68
  69 ######################################### Internal parameters
  70 #directory used for surefire & the source code.
  71 #They should not need to be modified
  72 #The final / is mandatory
  73 rootTestClassDirectory="./src/test/java/"
  74 surefireReportDirectory="./target/surefire-reports/"
  75
  76 #variable to use to debug the script without launching the tests
  77 mvnCommand="mvn "
  78 #mvnCommand="echo $mvnCommand"
  79
  80 ######################################### Functions
  81 #get the list of the process considered as dead
  82 # i.e.: in the same group as the script and with a ppid of 1
  83 # We do this because surefire can leave some dead process, so
  84 # we will jstack them and kill them
  85 function createListDeadProcess {
  86   id=$$
  87   listDeadProcess=""
  88
  89   #list of the process with a ppid of 1
  90   sonProcess=`ps -o pid= --ppid 1`
  91
  92   #then the process with a pgid of the script
  93   for pId in $sonProcess
  94   do
  95     pgid=`ps -o pgid= --pid $pId | sed 's/ //g'`
  96     if [ "$pgid" == "$id" ]
  97     then
  98       listDeadProcess="$pId $listDeadProcess"
  99     fi
 100   done
 101 }
 102
 103 #kill the java sub process, if any, with a kill and a kill -9
 104 #When maven/surefire fails, it lefts some process with a ppid==1
 105 #we're going to find them with the pgid, print the stack and kill them.
 106 function cleanProcess {
 107   id=$$
 108
 109   createListDeadProcess
 110   for pId in $listDeadProcess
 111   do
 112     echo "$pId survived, I will kill if it's a java process. 'ps' says:"
 113     ps -fj --pid $pId
 114     name=`ps -o comm= --pid $pId`
 115     if [ "$name" == "java" ]
 116     then
 117       echo "$pId, java sub process of $id, is still running, killing it with a standard kill"
 118       echo "Stack for $pId before kill:"
 119       jstack -F -l $pId
 120       kill $pId
 121       echo "kill sent, waiting for 30 seconds"
 122       sleep 30
 123       son=`ps -o pid= --pid $pId | wc -l`
 124       if (test $son -gt 0)
 125       then
 126         echo "$pId, java sub process of $id, is still running after a standard kill, using kill -9 now"
 127         echo "Stack for $pId before kill -9:"
 128         jstack -F -l $pId
 129         kill -9 $pId
 130         echo "kill sent, waiting for 2 seconds"
 131         sleep 2
 132         echo "Process $pId killed by kill -9"
 133       else
 134         echo "Process $pId killed by standard kill -15"
 135       fi
 136     else
 137       echo "$pId is not a java process (it's $name), I don't kill it."
 138     fi
 139   done
 140
 141   createListDeadProcess
 142   if (test ${#listDeadProcess} -gt 0)
 143   then
 144     echo "There are still $sonProcess for process $id left."
 145   else
 146     echo "Process $id clean, no son process left"
 147   fi
 148 }
 149
 150 #count the number of ',' in a string
 151 # used to calculate the number of class
 152 #write $count
 153 function countClasses {
 154   cars=`echo $1 | sed 's/[^,]//g' | wc -c `
 155   count=$((cars - 1))
 156 }
 157
 158
 159 ######################################### script
 160 echo "Starting Script. Possible parameters are: runAllTests, replayFailed, nonParallelMaven"
 161 echo "Other parameters are sent to maven"
 162
 163 #We will use this value at the end to calculate the execution time
 164 startTime=`date +%s`
 165
 166 #look in the arguments if we override default values
 167 for arg in "$@"
 168 do
 169   if [ $arg == "runAllTests" ]
 170   then
 171     runAllTests=1
 172   else
 173     if [ $arg == "replayFailed" ]
 174     then
 175       replayFailed=1
 176     else
 177       if [ $arg == "nonParallelMaven" ]
 178       then
 179         parallelMaven=0
 180       else
 181          args=$args" $arg"
 182       fi
 183     fi
 184   fi
 185 done
 186
 187
 188
 189 testsList=$(find $rootTestClassDirectory -name "Test*.java")
 190
 191
 192 #for all java test files, let see if they contain the pattern
 193 # to recognize the category
 194 for testFile in $testsList
 195 do
 196   lenPath=$((${#rootTestClassDirectory}))
 197   len=$((${#testFile} - $lenPath - 5))  # len(".java") == 5
 198
 199   shortTestFile=${testFile:lenPath:$len}
 200   testName=$(echo $shortTestFile | sed 's/\//\./g')
 201
 202   #The ',' is used in the grep pattern as we don't want to catch
 203   # partial name
 204   isFlaky=$((`echo $flakyTests | grep "$testName," | wc -l`))
 205
 206   if (test $isFlaky -eq 0)
 207   then
 208     isSmall=0
 209     isMedium=0
 210     isLarge=0
 211
 212     # determine the category of the test by greping into the source code
 213     isMedium=`grep "@Category" $testFile | grep "MediumTests.class" | wc -l`
 214     if (test $isMedium -eq 0)
 215     then
 216       isLarge=`grep "@Category" $testFile | grep "LargeTests.class" | wc -l`
 217       if (test $isLarge -eq 0)
 218       then
 219         isSmall=`grep "@Category" $testFile | grep "SmallTests.class" | wc -l`
 220         if (test $isSmall -eq 0)
 221         then
 222           echo "$testName is not categorized, so it won't be tested"
 223         else
 224           #sanity check on small tests
 225           isStrange=`grep "\.startMini" $testFile | wc -l`
 226           if (test $isStrange -gt 0)
 227           then
 228             echo "$testFile is categorized as 'small' but contains a .startMini string. Keep it as small anyway, but it's strange."
 229           fi
 230         fi
 231       fi
 232     fi
 233
 234     #put the test in the right list
 235     if (test $isSmall -gt 0)
 236     then
 237       smallList="$smallList,$testName"
 238     fi
 239     if (test $isMedium -gt 0)
 240     then
 241       mediumList="$mediumList,$testName"
 242     fi
 243     if (test $isLarge -gt 0)
 244     then
 245       largeList="$largeList,$testName"
 246     fi
 247
 248   fi
 249 done
 250
 251 #remove the ',' at the beginning
 252 smallList=${smallList:1:${#smallList}}
 253 mediumList=${mediumList:1:${#mediumList}}
 254 largeList=${largeList:1:${#largeList}}
 255
 256 countClasses $smallList
 257 echo "There are $count small tests"
 258
 259 countClasses $mediumList
 260 echo "There are $count medium tests"
 261
 262 countClasses $largeList
 263 echo "There are $count large tests"
 264
 265
 266
 267
 268 #do we launch only dev or all tests?
 269 if (test $runAllTests -eq 1)
 270 then
 271   echo "Running all tests, small, medium and large"
 272   longList="$mediumList,$largeList"
 273 else
 274   echo "Running developper tests only, small and medium categories"
 275   longList=$mediumList
 276 fi
 277
 278 #medium and large test can be run in //, so we're
 279 #going to create two lists
 280 nextList=1
 281 for testClass in `echo $longList | sed 's/,/ /g'`
 282 do
 283   if (test $nextList -eq 1)
 284   then
 285     nextList=2
 286     runList1=$runList1,$testClass
 287   else
 288     nextList=1
 289     runList2=$runList2,$testClass
 290   fi
 291 done
 292
 293 #remove the ',' at the beginning
 294 runList1=${runList1:1:${#runList1}}
 295 runList2=${runList2:1:${#runList2}}
 296
 297 #now we can run the tests, at last!
 298
 299 echo "Running small tests with one maven instance, in parallel"
 300 #echo Small tests are $smallList
 301 $mvnCommand -P singleJVMTests test -Dtest=$smallList  $args
 302 cleanProcess
 303
 304 exeTime=$(((`date +%s` - $startTime)/60))
 305 echo "Small tests executed after $exeTime minutes"
 306
 307 if (test $parallelMaven -gt 0)
 308 then
 309   echo "Running tests with two maven instances in parallel"
 310   $mvnCommand -P localTests test -Dtest=$runList1  $args &
 311
 312   #give some time  to the fist process if there is anything to compile
 313   sleep 30
 314   $mvnCommand -P localTests test -Dtest=$runList2  $args
 315
 316   #wait for forked process to finish
 317   wait
 318
 319   cleanProcess
 320
 321   exeTime=$(((`date +%s` - $startTime)/60))
 322   echo "Medium and large (if selected) tests executed after $exeTime minutes"
 323
 324   #now the flaky tests, alone, if the list is not empty
 325   # we test on size greater then 5 to remove any "," effect
 326   if (test $runAllTests -eq 1 && test ${#flakyTests} -gt 5)
 327   then
 328     echo "Running flaky tests"
 329     $mvnCommand -P localTests test -Dtest=$flakyTests $args
 330     cleanProcess
 331     exeTime=$(((`date +%s` - $startTime)/60))
 332     echo "Flaky tests executed after $exeTime minutes"
 333   fi
 334 else
 335   echo "Running tests with a single maven instance, no parallelization"
 336   $mvnCommand -P localTests test -Dtest=$runList1,$runList2,$flakyTests $args
 337   cleanProcess
 338   exeTime=$(((`date +%s` - $startTime)/60))
 339   echo "Single maven instance tests executed after $exeTime minutes"
 340 fi
 341
 342 #let's analyze the results
 343 fullRunList="$smallList,$longList"
 344
 345 if (test $runAllTests -eq 1)
 346 then
 347   fullRunList="$fullRunList,$flakyTests"
 348 fi
 349
 350 #single timestamp to ensure files uniquess.
 351 timestamp=`date +%s`
 352
 353 #some counters, initialized because they may not be touched
 354 # in the loop
 355 errorCounter=0
 356 sucessCounter=0
 357 notFinishedCounter=0
 358
 359 for testClass in `echo $fullRunList | sed 's/,/ /g'`
 360 do
 361   reportFile=$surefireReportDirectory/$testClass.txt
 362   outputReportFile=$surefireReportDirectory/$testClass-output.txt
 363
 364   if [ -s $reportFile ];
 365   then
 366     isError=`grep FAILURE $reportFile | wc -l`
 367     if (test $isError -gt 0)
 368     then
 369       errorList="$errorList,$testClass"
 370       errorCounter=$(($errorCounter + 1))
 371
 372       #let's copy the files if we want to use it later
 373       cp $reportFile "$surefireReportDirectory/fail_$timestamp.$testClass.txt"
 374       if [ -s $reportFile ];
 375       then
 376         cp $outputReportFile "$surefireReportDirectory/fail_$timestamp.$testClass"-output.txt""
 377       fi
 378     else
 379
 380       sucessCounter=$(($sucessCounter +1))
 381     fi
 382   else
 383      #report file does not exist or is empty => the test didn't finish
 384      notFinishedCounter=$(($notFinishedCounter + 1))
 385      notFinishedList="$notFinishedList,$testClass"
 386   fi
 387 done
 388
 389 #list of all tests that failed
 390 replayList="$notFinishedList""$errorList"
 391
 392 #remove the ',' at the beginning
 393 notFinishedList=${notFinishedList:1:${#notFinishedList}}
 394 errorList=${errorList:1:${#errorList}}
 395 replayList=${replayList:1:${#replayList}}
 396
 397 #make it simpler to read by removing the org.* stuff from the name
 398 notFinishedPresList=`echo $notFinishedList | sed 's/org.apache.hadoop.hbase.//g' | sed 's/,/, /g'`
 399 errorPresList=`echo $errorList | sed 's/org.apache.hadoop.hbase.//g' | sed 's/,/, /g'`
 400
 401
 402 #calculate the execution time
 403 curTime=`date +%s`
 404 exeTime=$((($curTime - $startTime)/60))
 405
 406 echo "##########################"
 407 echo "$sucessCounter tests executed successfully"
 408 echo "$errorCounter tests are in error"
 409 echo "$notFinishedCounter tests didn't finish"
 410 echo
 411 echo "Tests in error are: $errorPresList"
 412 echo "Tests that didn't finish are: $notFinishedPresList"
 413 echo
 414 echo "Execution time in minutes: $exeTime"
 415 echo "##########################"
 416
 417
 418 if (test ${#replayList} -gt 0)
 419 then
 420   if (test $replayFailed -gt 0)
 421   then
 422     echo "Replaying all tests that failed"
 423     $mvnCommand -P localTests test -Dtest=$replayList  $args
 424     echo "Replaying done"
 425   fi
 426 fi
 427
 428 exit