[RSGroup] Forward-port HBASE-22658 to master branch and branch-2.x (#1326)
[hbase.git] / dev-support / hbasetests.sh
blob50c3a51bcdd0e86ea87e944f9f84a45713e1e50a
1 #!/usr/bin/env bash
2 ##
3 # Licensed to the Apache Software Foundation (ASF) under one
4 # or more contributor license agreements. See the NOTICE file
5 # distributed with this work for additional information
6 # regarding copyright ownership. The ASF licenses this file
7 # to you under the Apache License, Version 2.0 (the
8 # "License"); you may not use this file except in compliance
9 # with the License. You may obtain a copy of the License at
11 # http://www.apache.org/licenses/LICENSE-2.0
13 # Unless required by applicable law or agreed to in writing, software
14 # distributed under the License is distributed on an "AS IS" BASIS,
15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 # See the License for the specific language governing permissions and
17 # limitations under the License.
20 # This script:
21 # - analyse the content of the .java test file to split them between
22 # small/medium/large
23 # - launch the small tests in a single maven, with surefire
24 # parallelisation activated
25 # - launch the medium & large in two maven, parallelized
26 # - the flaky tests are run at the end, not parallelized
27 # - present a small report of the global results
28 # - copy the failed test reports with prefix 'fail_' and a timestamp
29 # to protect them from a later deletion by maven
30 # - if configured for, relaunch the tests in errors
33 # Caveats:
34 # - multiple maven are launch, hence there can be recompilation
35 # between the tests if a file is modified. For non flaky tests and
36 # parallelization, the frame is the time to execute the small tests,
37 # so it's around 4 minutes.
38 # - Note that surefire is buggy, and the results presented while
39 # running may be wrong. For example, it can says that a class tests
40 # have 5 errors. When you look at the file it wrote, it says that the
41 # 2 tests are ok, and in the class there are actually two tests
42 # methods, not five. If you generate the report at the end with
43 # surefire-report it's fine however.
45 ######################################### parameters
47 #mvn test -Dtest=org.apache.hadoop.hbase.regionserver.TestScanWithBloomError $*
49 #exit
51 #set to 0 to run only developpers tests (small & medium categories)
52 runAllTests=0
54 #set to 1 to replay the failed tests. Previous reports are kept in
55 # fail_ files
56 replayFailed=0
58 #set to 0 to run all medium & large tests in a single maven operation
59 # instead of two
60 parallelMaven=1
62 #harcoded list of tests that often fail. We don't want to add any
63 # complexity around then so there are not run in parallel but after
64 # the others
65 #The ',' at the end is mandatory
66 flakyTests=
67 #org.apache.hadoop.hbase.mapreduce.TestTableInputFormatScan,org.apache.hadoop.hbase.catalog.TestMetaTableAccessorNoCluster,org.apache.hadoop.hbase.catalog.TestMetaTableAccessor,org.apache.hadoop.hbase.mapreduce.TestHFileOutputFormat,org.apache.hadoop.hbase.mapred.TestTableMapReduce,org.apache.hadoop.hbase.coprocessor.TestMasterCoprocessorExceptionWithAbort,org.apache.hadoop.hbase.coprocessor.TestMasterCoprocessorExceptionWithRemove,org.apache.hadoop.hbase.client.TestAdmin,org.apache.hadoop.hbase.master.TestMasterFailover,org.apache.hadoop.hbase.regionserver.wal.TestLogRolling,org.apache.hadoop.hbase.master.TestDistributedLogSplitting,org.apache.hadoop.hbase.master.TestMasterRestartAfterDisablingTable,org.apache.hadoop.hbase.TestGlobalMemStoreSize,
69 ######################################### Internal parameters
70 #directory used for surefire & the source code.
71 #They should not need to be modified
72 #The final / is mandatory
73 rootTestClassDirectory="./src/test/java/"
74 surefireReportDirectory="./target/surefire-reports/"
76 #variable to use to debug the script without launching the tests
77 mvnCommand="mvn "
78 #mvnCommand="echo $mvnCommand"
80 ######################################### Functions
81 #get the list of the process considered as dead
82 # i.e.: in the same group as the script and with a ppid of 1
83 # We do this because surefire can leave some dead process, so
84 # we will jstack them and kill them
85 function createListDeadProcess {
86 id=$$
87 listDeadProcess=""
89 #list of the process with a ppid of 1
90 sonProcess=`ps -o pid= --ppid 1`
92 #then the process with a pgid of the script
93 for pId in $sonProcess
95 pgid=`ps -o pgid= --pid $pId | sed 's/ //g'`
96 if [ "$pgid" == "$id" ]
97 then
98 listDeadProcess="$pId $listDeadProcess"
100 done
103 #kill the java sub process, if any, with a kill and a kill -9
104 #When maven/surefire fails, it lefts some process with a ppid==1
105 #we're going to find them with the pgid, print the stack and kill them.
106 function cleanProcess {
107 id=$$
109 createListDeadProcess
110 for pId in $listDeadProcess
112 echo "$pId survived, I will kill if it's a java process. 'ps' says:"
113 ps -fj --pid $pId
114 name=`ps -o comm= --pid $pId`
115 if [ "$name" == "java" ]
116 then
117 echo "$pId, java sub process of $id, is still running, killing it with a standard kill"
118 echo "Stack for $pId before kill:"
119 jstack -F -l $pId
120 kill $pId
121 echo "kill sent, waiting for 30 seconds"
122 sleep 30
123 son=`ps -o pid= --pid $pId | wc -l`
124 if (test $son -gt 0)
125 then
126 echo "$pId, java sub process of $id, is still running after a standard kill, using kill -9 now"
127 echo "Stack for $pId before kill -9:"
128 jstack -F -l $pId
129 kill -9 $pId
130 echo "kill sent, waiting for 2 seconds"
131 sleep 2
132 echo "Process $pId killed by kill -9"
133 else
134 echo "Process $pId killed by standard kill -15"
136 else
137 echo "$pId is not a java process (it's $name), I don't kill it."
139 done
141 createListDeadProcess
142 if (test ${#listDeadProcess} -gt 0)
143 then
144 echo "There are still $sonProcess for process $id left."
145 else
146 echo "Process $id clean, no son process left"
150 #count the number of ',' in a string
151 # used to calculate the number of class
152 #write $count
153 function countClasses {
154 cars=`echo $1 | sed 's/[^,]//g' | wc -c `
155 count=$((cars - 1))
159 ######################################### script
160 echo "Starting Script. Possible parameters are: runAllTests, replayFailed, nonParallelMaven"
161 echo "Other parameters are sent to maven"
163 #We will use this value at the end to calculate the execution time
164 startTime=`date +%s`
166 #look in the arguments if we override default values
167 for arg in "$@"
169 if [ $arg == "runAllTests" ]
170 then
171 runAllTests=1
172 else
173 if [ $arg == "replayFailed" ]
174 then
175 replayFailed=1
176 else
177 if [ $arg == "nonParallelMaven" ]
178 then
179 parallelMaven=0
180 else
181 args=$args" $arg"
185 done
189 testsList=$(find $rootTestClassDirectory -name "Test*.java")
192 #for all java test files, let see if they contain the pattern
193 # to recognize the category
194 for testFile in $testsList
196 lenPath=$((${#rootTestClassDirectory}))
197 len=$((${#testFile} - $lenPath - 5)) # len(".java") == 5
199 shortTestFile=${testFile:lenPath:$len}
200 testName=$(echo $shortTestFile | sed 's/\//\./g')
202 #The ',' is used in the grep pattern as we don't want to catch
203 # partial name
204 isFlaky=$((`echo $flakyTests | grep "$testName," | wc -l`))
206 if (test $isFlaky -eq 0)
207 then
208 isSmall=0
209 isMedium=0
210 isLarge=0
212 # determine the category of the test by greping into the source code
213 isMedium=`grep "@Category" $testFile | grep "MediumTests.class" | wc -l`
214 if (test $isMedium -eq 0)
215 then
216 isLarge=`grep "@Category" $testFile | grep "LargeTests.class" | wc -l`
217 if (test $isLarge -eq 0)
218 then
219 isSmall=`grep "@Category" $testFile | grep "SmallTests.class" | wc -l`
220 if (test $isSmall -eq 0)
221 then
222 echo "$testName is not categorized, so it won't be tested"
223 else
224 #sanity check on small tests
225 isStrange=`grep "\.startMini" $testFile | wc -l`
226 if (test $isStrange -gt 0)
227 then
228 echo "$testFile is categorized as 'small' but contains a .startMini string. Keep it as small anyway, but it's strange."
234 #put the test in the right list
235 if (test $isSmall -gt 0)
236 then
237 smallList="$smallList,$testName"
239 if (test $isMedium -gt 0)
240 then
241 mediumList="$mediumList,$testName"
243 if (test $isLarge -gt 0)
244 then
245 largeList="$largeList,$testName"
249 done
251 #remove the ',' at the beginning
252 smallList=${smallList:1:${#smallList}}
253 mediumList=${mediumList:1:${#mediumList}}
254 largeList=${largeList:1:${#largeList}}
256 countClasses $smallList
257 echo "There are $count small tests"
259 countClasses $mediumList
260 echo "There are $count medium tests"
262 countClasses $largeList
263 echo "There are $count large tests"
268 #do we launch only dev or all tests?
269 if (test $runAllTests -eq 1)
270 then
271 echo "Running all tests, small, medium and large"
272 longList="$mediumList,$largeList"
273 else
274 echo "Running developper tests only, small and medium categories"
275 longList=$mediumList
278 #medium and large test can be run in //, so we're
279 #going to create two lists
280 nextList=1
281 for testClass in `echo $longList | sed 's/,/ /g'`
283 if (test $nextList -eq 1)
284 then
285 nextList=2
286 runList1=$runList1,$testClass
287 else
288 nextList=1
289 runList2=$runList2,$testClass
291 done
293 #remove the ',' at the beginning
294 runList1=${runList1:1:${#runList1}}
295 runList2=${runList2:1:${#runList2}}
297 #now we can run the tests, at last!
299 echo "Running small tests with one maven instance, in parallel"
300 #echo Small tests are $smallList
301 $mvnCommand -P singleJVMTests test -Dtest=$smallList $args
302 cleanProcess
304 exeTime=$(((`date +%s` - $startTime)/60))
305 echo "Small tests executed after $exeTime minutes"
307 if (test $parallelMaven -gt 0)
308 then
309 echo "Running tests with two maven instances in parallel"
310 $mvnCommand -P localTests test -Dtest=$runList1 $args &
312 #give some time to the fist process if there is anything to compile
313 sleep 30
314 $mvnCommand -P localTests test -Dtest=$runList2 $args
316 #wait for forked process to finish
317 wait
319 cleanProcess
321 exeTime=$(((`date +%s` - $startTime)/60))
322 echo "Medium and large (if selected) tests executed after $exeTime minutes"
324 #now the flaky tests, alone, if the list is not empty
325 # we test on size greater then 5 to remove any "," effect
326 if (test $runAllTests -eq 1 && test ${#flakyTests} -gt 5)
327 then
328 echo "Running flaky tests"
329 $mvnCommand -P localTests test -Dtest=$flakyTests $args
330 cleanProcess
331 exeTime=$(((`date +%s` - $startTime)/60))
332 echo "Flaky tests executed after $exeTime minutes"
334 else
335 echo "Running tests with a single maven instance, no parallelization"
336 $mvnCommand -P localTests test -Dtest=$runList1,$runList2,$flakyTests $args
337 cleanProcess
338 exeTime=$(((`date +%s` - $startTime)/60))
339 echo "Single maven instance tests executed after $exeTime minutes"
342 #let's analyze the results
343 fullRunList="$smallList,$longList"
345 if (test $runAllTests -eq 1)
346 then
347 fullRunList="$fullRunList,$flakyTests"
350 #single timestamp to ensure files uniquess.
351 timestamp=`date +%s`
353 #some counters, initialized because they may not be touched
354 # in the loop
355 errorCounter=0
356 sucessCounter=0
357 notFinishedCounter=0
359 for testClass in `echo $fullRunList | sed 's/,/ /g'`
361 reportFile=$surefireReportDirectory/$testClass.txt
362 outputReportFile=$surefireReportDirectory/$testClass-output.txt
364 if [ -s $reportFile ];
365 then
366 isError=`grep FAILURE $reportFile | wc -l`
367 if (test $isError -gt 0)
368 then
369 errorList="$errorList,$testClass"
370 errorCounter=$(($errorCounter + 1))
372 #let's copy the files if we want to use it later
373 cp $reportFile "$surefireReportDirectory/fail_$timestamp.$testClass.txt"
374 if [ -s $reportFile ];
375 then
376 cp $outputReportFile "$surefireReportDirectory/fail_$timestamp.$testClass"-output.txt""
378 else
380 sucessCounter=$(($sucessCounter +1))
382 else
383 #report file does not exist or is empty => the test didn't finish
384 notFinishedCounter=$(($notFinishedCounter + 1))
385 notFinishedList="$notFinishedList,$testClass"
387 done
389 #list of all tests that failed
390 replayList="$notFinishedList""$errorList"
392 #remove the ',' at the beginning
393 notFinishedList=${notFinishedList:1:${#notFinishedList}}
394 errorList=${errorList:1:${#errorList}}
395 replayList=${replayList:1:${#replayList}}
397 #make it simpler to read by removing the org.* stuff from the name
398 notFinishedPresList=`echo $notFinishedList | sed 's/org.apache.hadoop.hbase.//g' | sed 's/,/, /g'`
399 errorPresList=`echo $errorList | sed 's/org.apache.hadoop.hbase.//g' | sed 's/,/, /g'`
402 #calculate the execution time
403 curTime=`date +%s`
404 exeTime=$((($curTime - $startTime)/60))
406 echo "##########################"
407 echo "$sucessCounter tests executed successfully"
408 echo "$errorCounter tests are in error"
409 echo "$notFinishedCounter tests didn't finish"
410 echo
411 echo "Tests in error are: $errorPresList"
412 echo "Tests that didn't finish are: $notFinishedPresList"
413 echo
414 echo "Execution time in minutes: $exeTime"
415 echo "##########################"
418 if (test ${#replayList} -gt 0)
419 then
420 if (test $replayFailed -gt 0)
421 then
422 echo "Replaying all tests that failed"
423 $mvnCommand -P localTests test -Dtest=$replayList $args
424 echo "Replaying done"
428 exit