HBASE-15882 Upgrade to yetus precommit 0.3.0
[hbase.git] / bin / rolling-restart.sh
blob9ac4d64b823821c0e985199ac985b0268a286f1e
1 #!/usr/bin/env bash
3 #/**
4 # * Licensed to the Apache Software Foundation (ASF) under one
5 # * or more contributor license agreements. See the NOTICE file
6 # * distributed with this work for additional information
7 # * regarding copyright ownership. The ASF licenses this file
8 # * to you under the Apache License, Version 2.0 (the
9 # * "License"); you may not use this file except in compliance
10 # * with the License. You may obtain a copy of the License at
11 # *
12 # * http://www.apache.org/licenses/LICENSE-2.0
13 # *
14 # * Unless required by applicable law or agreed to in writing, software
15 # * distributed under the License is distributed on an "AS IS" BASIS,
16 # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 # * See the License for the specific language governing permissions and
18 # * limitations under the License.
19 # */
21 # Run a shell command on all regionserver hosts.
23 # Environment Variables
25 # HBASE_REGIONSERVERS File naming remote hosts.
26 # Default is ${HADOOP_CONF_DIR}/regionservers
27 # HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_HOME}/conf.
28 # HBASE_CONF_DIR Alternate hbase conf dir. Default is ${HBASE_HOME}/conf.
29 # HBASE_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
30 # HBASE_SLAVE_TIMEOUT Seconds to wait for timing out a remote command.
31 # HBASE_SSH_OPTS Options passed to ssh when running remote commands.
33 # Modelled after $HADOOP_HOME/bin/slaves.sh.
35 usage_str="Usage: `basename $0` [--config <hbase-confdir>] [--rs-only] [--master-only]\
36 [--graceful [--maxthreads xx] [--noack] [--movetimeout]]"
38 function usage() {
39 echo "${usage_str}"
42 bin=`dirname "$0"`
43 bin=`cd "$bin">/dev/null; pwd`
45 . "$bin"/hbase-config.sh
47 # start hbase daemons
48 errCode=$?
49 if [ $errCode -ne 0 ]
50 then
51 exit $errCode
54 RR_RS=1
55 RR_MASTER=1
56 RR_GRACEFUL=0
57 RR_MAXTHREADS=1
58 RR_NOACK=
59 RR_MOVE_TIMEOUT=2147483647
61 while [ $# -gt 0 ]; do
62 case "$1" in
63 --rs-only|-r)
64 RR_RS=1
65 RR_MASTER=0
66 RR_GRACEFUL=0
67 shift
69 --master-only)
70 RR_RS=0
71 RR_MASTER=1
72 RR_GRACEFUL=0
73 shift
75 --graceful)
76 RR_RS=0
77 RR_MASTER=0
78 RR_GRACEFUL=1
79 shift
81 --maxthreads)
82 shift
83 RR_MAXTHREADS=$1
84 shift
86 --noack)
87 RR_NOACK="--noack"
88 shift
90 --movetimeout)
91 shift
92 RR_MOVE_TIMEOUT=$1
93 shift
95 --help|-h)
96 usage
97 exit 0
100 echo Bad argument: $1
101 usage
102 exit 1
104 esac
105 done
107 # quick function to get a value from the HBase config file
108 # HBASE-6504 - only take the first line of the output in case verbose gc is on
109 distMode=`HBASE_CONF_DIR=${HBASE_CONF_DIR} $bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool hbase.cluster.distributed | head -n 1`
110 if [ "$distMode" == 'false' ]; then
111 if [ $RR_RS -ne 1 ] || [ $RR_MASTER -ne 1 ]; then
112 echo Cant do selective rolling restart if not running distributed
113 exit 1
115 "$bin"/hbase-daemon.sh restart master
116 else
117 zparent=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool zookeeper.znode.parent`
118 if [ "$zparent" == "null" ]; then zparent="/hbase"; fi
120 if [ $RR_MASTER -eq 1 ]; then
121 # stop all masters before re-start to avoid races for master znode
122 "$bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" stop master
123 "$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
124 --hosts "${HBASE_BACKUP_MASTERS}" stop master-backup
126 # make sure the master znode has been deleted before continuing
127 zmaster=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool zookeeper.znode.master`
128 if [ "$zmaster" == "null" ]; then zmaster="master"; fi
129 zmaster=$zparent/$zmaster
130 echo -n "Waiting for Master ZNode ${zmaster} to expire"
131 echo
132 while ! "$bin"/hbase zkcli stat $zmaster 2>&1 | grep "Node does not exist"; do
133 echo -n "."
134 sleep 1
135 done
136 echo #force a newline
138 # all masters are down, now restart
139 "$bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" start master
140 "$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
141 --hosts "${HBASE_BACKUP_MASTERS}" start master-backup
143 echo "Wait a minute for master to come up join cluster"
144 sleep 60
146 # Master joing cluster will start in cleaning out regions in transition.
147 # Wait until the master has cleaned out regions in transition before
148 # giving it a bunch of work to do; master is vulnerable during startup
149 zunassigned=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool zookeeper.znode.unassigned`
150 if [ "$zunassigned" == "null" ]; then zunassigned="region-in-transition"; fi
151 zunassigned="$zparent/$zunassigned"
152 # Checking if /hbase/region-in-transition exist
153 ritZnodeCheck=`$bin/hbase zkcli stat ${zunassigned} 2>&1 | tail -1 \
154 | grep "Node does not exist:" >/dev/null`
155 ret=$?
156 if test 0 -eq ${ret}
157 then
158 echo "Znode ${zunassigned} does not exist"
159 else
160 echo -n "Waiting for ${zunassigned} to empty"
161 while true ; do
162 unassigned=`$bin/hbase zkcli stat ${zunassigned} 2>&1 \
163 | grep -e 'numChildren = '|sed -e 's,numChildren = ,,'`
164 if test 0 -eq ${unassigned}
165 then
166 echo
167 break
168 else
169 echo -n " ${unassigned}"
171 sleep 1
172 done
176 if [ $RR_RS -eq 1 ]; then
177 # unlike the masters, roll all regionservers one-at-a-time
178 export HBASE_SLAVE_PARALLEL=false
179 "$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
180 --hosts "${HBASE_REGIONSERVERS}" restart regionserver
183 if [ $RR_GRACEFUL -eq 1 ]; then
184 # gracefully restart all online regionservers
185 masterport=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool hbase.master.port`
186 if [ "$masterport" == "null" ]; then masterport="16000"; fi
187 zkrs=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool zookeeper.znode.rs`
188 if [ "$zkrs" == "null" ]; then zkrs="rs"; fi
189 zkrs="$zparent/$zkrs"
190 online_regionservers=`$bin/hbase zkcli ls $zkrs 2>&1 | tail -1 | sed "s/\[//" | sed "s/\]//"`
191 echo "Disabling load balancer"
192 HBASE_BALANCER_STATE=$(echo 'balance_switch false' | "$bin"/hbase --config "${HBASE_CONF_DIR}" shell -n | tail -1)
193 echo "Previous balancer state was $HBASE_BALANCER_STATE"
195 for rs in $online_regionservers
197 rs_parts=(${rs//,/ })
198 hostname=${rs_parts[0]}
199 port=${rs_parts[1]}
200 if [ "$port" -eq "$masterport" ]; then
201 echo "Skipping regionserver on master machine $hostname:$port"
202 continue
203 else
204 echo "Gracefully restarting: $hostname"
205 "$bin"/graceful_stop.sh --config ${HBASE_CONF_DIR} --restart --reload -nob --maxthreads \
206 ${RR_MAXTHREADS} ${RR_NOACK} --movetimeout ${RR_MOVE_TIMEOUT} $hostname
207 sleep 1
209 done
210 if [ "$HBASE_BALANCER_STATE" != "false" ]; then
211 echo "Restoring balancer state to $HBASE_BALANCER_STATE"
212 echo "balance_switch $HBASE_BALANCER_STATE" | "$bin"/hbase --config "${HBASE_CONF_DIR}" shell &> /dev/null