Preparing development version 2.4.4-SNAPSHOT
[hbase.git] / bin / rolling-restart.sh
blob11c091d6583cce7510fb44d2687965123d18042b
1 #!/usr/bin/env bash
3 #/**
4 # * Licensed to the Apache Software Foundation (ASF) under one
5 # * or more contributor license agreements. See the NOTICE file
6 # * distributed with this work for additional information
7 # * regarding copyright ownership. The ASF licenses this file
8 # * to you under the Apache License, Version 2.0 (the
9 # * "License"); you may not use this file except in compliance
10 # * with the License. You may obtain a copy of the License at
11 # *
12 # * http://www.apache.org/licenses/LICENSE-2.0
13 # *
14 # * Unless required by applicable law or agreed to in writing, software
15 # * distributed under the License is distributed on an "AS IS" BASIS,
16 # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 # * See the License for the specific language governing permissions and
18 # * limitations under the License.
19 # */
21 # Run a shell command on all regionserver hosts.
23 # Environment Variables
25 # HBASE_REGIONSERVERS File naming remote hosts.
26 # Default is ${HADOOP_CONF_DIR}/regionservers
27 # HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_HOME}/conf.
28 # HBASE_CONF_DIR Alternate hbase conf dir. Default is ${HBASE_HOME}/conf.
29 # HBASE_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
30 # HBASE_SLAVE_TIMEOUT Seconds to wait for timing out a remote command.
31 # HBASE_SSH_OPTS Options passed to ssh when running remote commands.
33 # Modelled after $HADOOP_HOME/bin/slaves.sh.
35 usage_str="Usage: `basename $0` [--config <hbase-confdir>] [--autostart-window-size <window size in hours>]\
36 [--autostart-window-retry-limit <retry count limit for autostart>] [--autostart] [--rs-only] [--master-only] \
37 [--graceful] [--maxthreads xx] [--noack] [--movetimeout]]"
39 function usage() {
40 echo "${usage_str}"
43 bin=`dirname "$0"`
44 bin=`cd "$bin">/dev/null; pwd`
46 # default autostart args value indicating infinite window size and no retry limit
47 AUTOSTART_WINDOW_SIZE=0
48 AUTOSTART_WINDOW_RETRY_LIMIT=0
50 . "$bin"/hbase-config.sh
52 # start hbase daemons
53 errCode=$?
54 if [ $errCode -ne 0 ]
55 then
56 exit $errCode
59 RR_RS=1
60 RR_MASTER=1
61 RR_GRACEFUL=0
62 RR_MAXTHREADS=1
63 START_CMD_NON_DIST_MODE=restart
64 START_CMD_DIST_MODE=start
65 RESTART_CMD_REGIONSERVER=restart
67 while [ $# -gt 0 ]; do
68 case "$1" in
69 --rs-only|-r)
70 RR_RS=1
71 RR_MASTER=0
72 RR_GRACEFUL=0
73 shift
75 --autostart)
76 START_CMD_NON_DIST_MODE="--autostart-window-size ${AUTOSTART_WINDOW_SIZE} --autostart-window-retry-limit ${AUTOSTART_WINDOW_RETRY_LIMIT} autorestart"
77 START_CMD_DIST_MODE="--autostart-window-size ${AUTOSTART_WINDOW_SIZE} --autostart-window-retry-limit ${AUTOSTART_WINDOW_RETRY_LIMIT} autostart"
78 RESTART_CMD_REGIONSERVER="--autostart-window-size ${AUTOSTART_WINDOW_SIZE} --autostart-window-retry-limit ${AUTOSTART_WINDOW_RETRY_LIMIT} autorestart"
79 shift
81 --master-only)
82 RR_RS=0
83 RR_MASTER=1
84 RR_GRACEFUL=0
85 shift
87 --graceful)
88 RR_RS=0
89 RR_MASTER=0
90 RR_GRACEFUL=1
91 shift
93 --maxthreads)
94 shift
95 RR_MAXTHREADS=$1
96 shift
98 --noack)
99 RR_NOACK="--noack"
100 shift
102 --movetimeout)
103 shift
104 RR_MOVE_TIMEOUT=$1
105 shift
107 --help|-h)
108 usage
109 exit 0
112 echo Bad argument: $1
113 usage
114 exit 1
116 esac
117 done
119 # quick function to get a value from the HBase config file
120 # HBASE-6504 - only take the first line of the output in case verbose gc is on
121 distMode=`HBASE_CONF_DIR=${HBASE_CONF_DIR} $bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool hbase.cluster.distributed | head -n 1`
122 if [ "$distMode" == 'false' ]; then
123 if [ $RR_RS -ne 1 ] || [ $RR_MASTER -ne 1 ]; then
124 echo Cant do selective rolling restart if not running distributed
125 exit 1
127 "$bin"/hbase-daemon.sh ${START_CMD_NON_DIST_MODE} master
128 else
129 zparent=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool zookeeper.znode.parent`
130 if [ "$zparent" == "null" ]; then zparent="/hbase"; fi
132 if [ $RR_MASTER -eq 1 ]; then
133 # stop all masters before re-start to avoid races for master znode
134 "$bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" stop master
135 "$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
136 --hosts "${HBASE_BACKUP_MASTERS}" stop master-backup
138 # make sure the master znode has been deleted before continuing
139 zmaster=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool zookeeper.znode.master`
140 if [ "$zmaster" == "null" ]; then zmaster="master"; fi
141 zmaster=$zparent/$zmaster
142 echo -n "Waiting for Master ZNode ${zmaster} to expire"
143 echo
144 while ! "$bin"/hbase zkcli stat $zmaster 2>&1 | grep "Node does not exist"; do
145 echo -n "."
146 sleep 1
147 done
148 echo #force a newline
150 # all masters are down, now restart
151 "$bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" ${START_CMD_DIST_MODE} master
152 "$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
153 --hosts "${HBASE_BACKUP_MASTERS}" ${START_CMD_DIST_MODE} master-backup
155 echo "Wait a minute for master to come up join cluster"
156 sleep 60
158 # Master joing cluster will start in cleaning out regions in transition.
159 # Wait until the master has cleaned out regions in transition before
160 # giving it a bunch of work to do; master is vulnerable during startup
161 zunassigned=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool zookeeper.znode.unassigned`
162 if [ "$zunassigned" == "null" ]; then zunassigned="region-in-transition"; fi
163 zunassigned="$zparent/$zunassigned"
164 # Checking if /hbase/region-in-transition exist
165 ritZnodeCheck=`$bin/hbase zkcli stat ${zunassigned} 2>&1 | tail -1 \
166 | grep "Node does not exist:" >/dev/null`
167 ret=$?
168 if test 0 -eq ${ret}
169 then
170 echo "Znode ${zunassigned} does not exist"
171 else
172 echo -n "Waiting for ${zunassigned} to empty"
173 while true ; do
174 unassigned=`$bin/hbase zkcli stat ${zunassigned} 2>&1 \
175 | grep -e 'numChildren = '|sed -e 's,numChildren = ,,'`
176 if test 0 -eq ${unassigned}
177 then
178 echo
179 break
180 else
181 echo -n " ${unassigned}"
183 sleep 1
184 done
188 if [ $RR_RS -eq 1 ]; then
189 # unlike the masters, roll all regionservers one-at-a-time
190 export HBASE_SLAVE_PARALLEL=false
191 "$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
192 --hosts "${HBASE_REGIONSERVERS}" ${RESTART_CMD_REGIONSERVER} regionserver
195 if [ $RR_GRACEFUL -eq 1 ]; then
196 # gracefully restart all online regionservers
197 masterport=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool hbase.master.port`
198 if [ "$masterport" == "null" ]; then masterport="16000"; fi
199 zkrs=`$bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool zookeeper.znode.rs`
200 if [ "$zkrs" == "null" ]; then zkrs="rs"; fi
201 zkrs="$zparent/$zkrs"
202 online_regionservers=`$bin/hbase zkcli ls $zkrs 2>&1 | tail -1 | sed "s/\[//" | sed "s/\]//"`
203 echo "Disabling load balancer"
204 HBASE_BALANCER_STATE=$(echo 'balance_switch false' | "$bin"/hbase --config "${HBASE_CONF_DIR}" shell -n | tail -1)
205 echo "Previous balancer state was $HBASE_BALANCER_STATE"
207 for rs in $online_regionservers
209 rs_parts=(${rs//,/ })
210 hostname=${rs_parts[0]}
211 port=${rs_parts[1]}
212 if [ "$port" -eq "$masterport" ]; then
213 echo "Skipping regionserver on master machine $hostname:$port"
214 continue
215 else
216 echo "Gracefully restarting: $hostname"
217 "$bin"/graceful_stop.sh --config ${HBASE_CONF_DIR} --restart --reload -nob --maxthreads \
218 ${RR_MAXTHREADS} ${RR_NOACK} --movetimeout ${RR_MOVE_TIMEOUT} $hostname
219 sleep 1
221 done
222 if [ "$HBASE_BALANCER_STATE" != "false" ]; then
223 echo "Restoring balancer state to $HBASE_BALANCER_STATE"
224 echo "balance_switch $HBASE_BALANCER_STATE" | "$bin"/hbase --config "${HBASE_CONF_DIR}" shell &> /dev/null