HBASE-26718 HFileArchiver can remove referenced StoreFiles from the archive (#4274)
[hbase.git] / dev-support / zombie-detector.sh
blobdf4c197ce4dfb85b048f2ed9abc7697f17148d59
1 #!/usr/bin/env bash
2 # Licensed to the Apache Software Foundation (ASF) under one
3 # or more contributor license agreements. See the NOTICE file
4 # distributed with this work for additional information
5 # regarding copyright ownership. The ASF licenses this file
6 # to you under the Apache License, Version 2.0 (the
7 # "License"); you may not use this file except in compliance
8 # with the License. You may obtain a copy of the License at
10 # http://www.apache.org/licenses/LICENSE-2.0
12 # Unless required by applicable law or agreed to in writing,
13 # software distributed under the License is distributed on an
14 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 # KIND, either express or implied. See the License for the
16 # specific language governing permissions and limitations
17 # under the License.
19 # Looks for any running zombies left over from old build runs.
20 # Will report and try to do stack trace on stale processes so can
21 # figure how they are hung. Echos state as the script runs
22 # on STDERR but prints final output on STDOUT formatted so it
23 # will fold into the test result formatting done by test-patch.sh.
24 # This script is called from test-patch.sh but also after tests
25 # have run up on builds.apache.org.
27 # TODO: format output to suit context -- test-patch, jenkins or dev env
29 #set -x
30 # printenv
32 ### Setup some variables.
33 bindir=$(dirname $0)
35 # This key is set by our surefire configuration up in the main pom.xml
36 # This key needs to match the key we set up there.
37 HBASE_BUILD_ID_KEY="hbase.build.id="
38 JENKINS=
40 PS=${PS:-ps}
41 AWK=${AWK:-awk}
42 WGET=${WGET:-wget}
43 GREP=${GREP:-grep}
44 JIRACLI=${JIRA:-jira}
46 ###############################################################################
47 printUsage() {
48 echo "Usage: $0 [options]" BUILD_ID
49 echo
50 echo "Where:"
51 echo " BUILD_ID is build id to look for in process listing"
52 echo
53 echo "Options:"
54 echo "--ps-cmd=<cmd> The 'ps' command to use (default 'ps')"
55 echo "--awk-cmd=<cmd> The 'awk' command to use (default 'awk')"
56 echo "--grep-cmd=<cmd> The 'grep' command to use (default 'grep')"
57 echo
58 echo "Jenkins-only options:"
59 echo "--jenkins Run by Jenkins (runs tests and posts results to JIRA)"
60 echo "--wget-cmd=<cmd> The 'wget' command to use (default 'wget')"
61 echo "--jira-cmd=<cmd> The 'jira' command to use (default 'jira')"
64 ###############################################################################
65 parseArgs() {
66 for i in $*
68 case $i in
69 --jenkins)
70 JENKINS=true
72 --ps-cmd=*)
73 PS=${i#*=}
75 --awk-cmd=*)
76 AWK=${i#*=}
78 --wget-cmd=*)
79 WGET=${i#*=}
81 --grep-cmd=*)
82 GREP=${i#*=}
84 --jira-cmd=*)
85 JIRACLI=${i#*=}
88 BUILD_ID=$i
90 esac
91 done
92 if [ -z "$BUILD_ID" ]; then
93 printUsage
94 exit 1
98 ### Return list of the processes found with passed build id.
99 find_processes () {
100 jps -v | grep surefirebooter | grep -e "${HBASE_BUILD_TAG}"
103 ### Look for zombies
104 zombies () {
105 ZOMBIES=`find_processes`
106 if [[ -z ${ZOMBIES} ]]
107 then
108 ZOMBIE_TESTS_COUNT=0
109 else
110 ZOMBIE_TESTS_COUNT=`echo "${ZOMBIES}"| wc -l| xargs`
112 if [[ $ZOMBIE_TESTS_COUNT != 0 ]] ; then
113 wait=30
114 echo "`date` Found ${ZOMBIE_TESTS_COUNT} suspicious java process(es) listed below; waiting ${wait}s to see if just slow to stop" >&2
115 echo ${ZOMBIES} >&2
116 sleep ${wait}
117 PIDS=`echo "${ZOMBIES}"|${AWK} '{print $1}'`
118 ZOMBIE_TESTS_COUNT=0
119 for pid in $PIDS
121 # Test our zombie still running (and that it still an hbase build item)
122 PS_OUTPUT=`ps -p $pid | tail +2 | grep -e "${HBASE_BUILD_TAG}"`
123 if [[ ! -z "${PS_OUTPUT}" ]]
124 then
125 echo "`date` Zombie: $PS_OUTPUT" >&2
126 let "ZOMBIE_TESTS_COUNT+=1"
127 PS_STACK=`jstack $pid | grep -e "\.Test" | grep -e "\.java"| head -3`
128 echo "${PS_STACK}" >&2
129 ZB_STACK="${ZB_STACK}\nPID=${pid} ${PS_STACK}"
131 done
132 if [[ $ZOMBIE_TESTS_COUNT != 0 ]]
133 then
134 echo "`date` There are ${ZOMBIE_TESTS_COUNT} possible zombie test(s)." >&2
135 # If JIRA_COMMENT in environment, append our findings to it
136 echo -e "$JIRA_COMMENT
138 {color:red}+1 zombies{red}. There are ${ZOMBIE_TESTS_COUNT} possible zombie test(s)
139 ${ZB_STACK}"
140 # Exit with exit code of 1.
141 exit 1
142 else
143 echo "`date` We're ok: there was a zombie candidate but it went away" >&2
144 echo "$JIRA_COMMENT
146 {color:green}+1 zombies{color}. No zombie tests found running at the end of the build (There were candidates but they seem to have gone away)."
148 else
149 echo "`date` We're ok: there is no zombie test" >&2
150 echo "$JIRA_COMMENT
152 {color:green}+1 zombies{color}. No zombie tests found running at the end of the build."
156 ### Check if arguments to the script have been specified properly or not
157 parseArgs $@
158 HBASE_BUILD_TAG="${HBASE_BUILD_ID_KEY}${BUILD_ID}"
159 zombies
160 RESULT=$?
161 if [[ $JENKINS == "true" ]] ; then
162 if [[ $RESULT != 0 ]] ; then
163 exit 100
166 RESULT=$?