3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
19 package org
.apache
.hadoop
.hbase
.master
;
21 import static org
.junit
.Assert
.assertEquals
;
23 import java
.io
.IOException
;
24 import java
.util
.List
;
25 import java
.util
.NavigableSet
;
27 import java
.util
.TreeSet
;
29 import org
.apache
.commons
.logging
.Log
;
30 import org
.apache
.commons
.logging
.LogFactory
;
31 import org
.apache
.hadoop
.conf
.Configuration
;
32 import org
.apache
.hadoop
.hbase
.HBaseConfiguration
;
33 import org
.apache
.hadoop
.hbase
.HBaseTestingUtility
;
34 import org
.apache
.hadoop
.hbase
.HRegionInfo
;
35 import org
.apache
.hadoop
.hbase
.MiniHBaseCluster
;
36 import org
.apache
.hadoop
.hbase
.ServerName
;
37 import org
.apache
.hadoop
.hbase
.TableName
;
38 import org
.apache
.hadoop
.hbase
.client
.RegionLocator
;
39 import org
.apache
.hadoop
.hbase
.client
.Table
;
40 import org
.apache
.hadoop
.hbase
.shaded
.protobuf
.ProtobufUtil
;
41 import org
.apache
.hadoop
.hbase
.testclassification
.LargeTests
;
42 import org
.apache
.hadoop
.hbase
.testclassification
.MasterTests
;
43 import org
.apache
.hadoop
.hbase
.util
.Bytes
;
44 import org
.apache
.hadoop
.hbase
.util
.JVMClusterUtil
.MasterThread
;
45 import org
.apache
.hadoop
.hbase
.util
.JVMClusterUtil
.RegionServerThread
;
46 import org
.junit
.Rule
;
47 import org
.junit
.Test
;
48 import org
.junit
.experimental
.categories
.Category
;
49 import org
.junit
.rules
.TestName
;
52 * Tests the restarting of everything as done during rolling restarts.
54 @Category({MasterTests
.class, LargeTests
.class})
55 public class TestRollingRestart
{
56 private static final Log LOG
= LogFactory
.getLog(TestRollingRestart
.class);
59 public TestName name
= new TestName();
61 @Test (timeout
=500000)
62 public void testBasicRollingRestart() throws Exception
{
64 // Start a cluster with 2 masters and 4 regionservers
65 final int NUM_MASTERS
= 2;
67 final int NUM_REGIONS_TO_CREATE
= 20;
69 int expectedNumRS
= 3;
72 log("Starting cluster");
73 Configuration conf
= HBaseConfiguration
.create();
74 HBaseTestingUtility TEST_UTIL
= new HBaseTestingUtility(conf
);
75 TEST_UTIL
.startMiniCluster(NUM_MASTERS
, NUM_RS
);
76 MiniHBaseCluster cluster
= TEST_UTIL
.getHBaseCluster();
77 log("Waiting for active/ready master");
78 cluster
.waitForActiveAndReadyMaster();
80 // Create a table with regions
81 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
82 byte [] family
= Bytes
.toBytes("family");
83 log("Creating table with " + NUM_REGIONS_TO_CREATE
+ " regions");
84 Table ht
= TEST_UTIL
.createMultiRegionTable(tableName
, family
, NUM_REGIONS_TO_CREATE
);
86 try (RegionLocator r
= TEST_UTIL
.getConnection().getRegionLocator(tableName
)) {
87 numRegions
= r
.getStartKeys().length
;
89 numRegions
+= 1; // catalogs
90 log("Waiting for no more RIT\n");
91 TEST_UTIL
.waitUntilNoRegionsInTransition(60000);
92 log("Disabling table\n");
93 TEST_UTIL
.getAdmin().disableTable(tableName
);
94 log("Waiting for no more RIT\n");
95 TEST_UTIL
.waitUntilNoRegionsInTransition(60000);
96 NavigableSet
<String
> regions
= HBaseTestingUtility
.getAllOnlineRegions(cluster
);
97 log("Verifying only catalog and namespace regions are assigned\n");
98 if (regions
.size() != 2) {
99 for (String oregion
: regions
) log("Region still online: " + oregion
);
101 assertEquals(2, regions
.size());
102 log("Enabling table\n");
103 TEST_UTIL
.getAdmin().enableTable(tableName
);
104 log("Waiting for no more RIT\n");
105 TEST_UTIL
.waitUntilNoRegionsInTransition(60000);
106 log("Verifying there are " + numRegions
+ " assigned on cluster\n");
107 regions
= HBaseTestingUtility
.getAllOnlineRegions(cluster
);
108 assertRegionsAssigned(cluster
, regions
);
109 assertEquals(expectedNumRS
, cluster
.getRegionServerThreads().size());
111 // Add a new regionserver
112 log("Adding a fourth RS");
113 RegionServerThread restarted
= cluster
.startRegionServer();
115 restarted
.waitForServerOnline();
116 log("Additional RS is online");
117 log("Waiting for no more RIT");
118 TEST_UTIL
.waitUntilNoRegionsInTransition(60000);
119 log("Verifying there are " + numRegions
+ " assigned on cluster");
120 assertRegionsAssigned(cluster
, regions
);
121 assertEquals(expectedNumRS
, cluster
.getRegionServerThreads().size());
124 List
<MasterThread
> masterThreads
= cluster
.getMasterThreads();
125 MasterThread activeMaster
= null;
126 MasterThread backupMaster
= null;
127 assertEquals(2, masterThreads
.size());
128 if (masterThreads
.get(0).getMaster().isActiveMaster()) {
129 activeMaster
= masterThreads
.get(0);
130 backupMaster
= masterThreads
.get(1);
132 activeMaster
= masterThreads
.get(1);
133 backupMaster
= masterThreads
.get(0);
136 // Bring down the backup master
137 log("Stopping backup master\n\n");
138 backupMaster
.getMaster().stop("Stop of backup during rolling restart");
139 cluster
.hbaseCluster
.waitOnMaster(backupMaster
);
141 // Bring down the primary master
142 log("Stopping primary master\n\n");
143 activeMaster
.getMaster().stop("Stop of active during rolling restart");
144 cluster
.hbaseCluster
.waitOnMaster(activeMaster
);
146 // Start primary master
147 log("Restarting primary master\n\n");
148 activeMaster
= cluster
.startMaster();
149 cluster
.waitForActiveAndReadyMaster();
151 // Start backup master
152 log("Restarting backup master\n\n");
153 backupMaster
= cluster
.startMaster();
155 assertEquals(expectedNumRS
, cluster
.getRegionServerThreads().size());
157 // RegionServer Restarts
159 // Bring them down, one at a time, waiting between each to complete
160 List
<RegionServerThread
> regionServers
=
161 cluster
.getLiveRegionServerThreads();
163 int total
= regionServers
.size();
164 for (RegionServerThread rst
: regionServers
) {
165 ServerName serverName
= rst
.getRegionServer().getServerName();
166 log("Stopping region server " + num
+ " of " + total
+ " [ " +
168 rst
.getRegionServer().stop("Stopping RS during rolling restart");
169 cluster
.hbaseCluster
.waitOnRegionServer(rst
);
170 log("Waiting for RS shutdown to be handled by master");
171 waitForRSShutdownToStartAndFinish(activeMaster
, serverName
);
172 log("RS shutdown done, waiting for no more RIT");
173 TEST_UTIL
.waitUntilNoRegionsInTransition(60000);
174 log("Verifying there are " + numRegions
+ " assigned on cluster");
175 assertRegionsAssigned(cluster
, regions
);
177 assertEquals(expectedNumRS
, cluster
.getRegionServerThreads().size());
178 log("Restarting region server " + num
+ " of " + total
);
179 restarted
= cluster
.startRegionServer();
180 restarted
.waitForServerOnline();
182 log("Region server " + num
+ " is back online");
183 log("Waiting for no more RIT");
184 TEST_UTIL
.waitUntilNoRegionsInTransition(60000);
185 log("Verifying there are " + numRegions
+ " assigned on cluster");
186 assertRegionsAssigned(cluster
, regions
);
187 assertEquals(expectedNumRS
, cluster
.getRegionServerThreads().size());
191 assertRegionsAssigned(cluster
, regions
);
193 // TODO: Bring random 3 of 4 RS down at the same time
197 TEST_UTIL
.shutdownMiniCluster();
200 private void waitForRSShutdownToStartAndFinish(MasterThread activeMaster
,
201 ServerName serverName
) throws InterruptedException
{
202 ServerManager sm
= activeMaster
.getMaster().getServerManager();
203 // First wait for it to be in dead list
204 while (!sm
.getDeadServers().isDeadServer(serverName
)) {
205 log("Waiting for [" + serverName
+ "] to be listed as dead in master");
208 log("Server [" + serverName
+ "] marked as dead, waiting for it to " +
209 "finish dead processing");
210 while (sm
.areDeadServersInProgress()) {
211 log("Server [" + serverName
+ "] still being processed, waiting");
214 log("Server [" + serverName
+ "] done with server shutdown processing");
217 private void log(String msg
) {
218 LOG
.debug("\n\nTRR: " + msg
+ "\n");
221 private int getNumberOfOnlineRegions(MiniHBaseCluster cluster
) {
223 for (RegionServerThread rst
: cluster
.getLiveRegionServerThreads()) {
224 numFound
+= rst
.getRegionServer().getNumberOfOnlineRegions();
226 for (MasterThread mt
: cluster
.getMasterThreads()) {
227 numFound
+= mt
.getMaster().getNumberOfOnlineRegions();
232 private void assertRegionsAssigned(MiniHBaseCluster cluster
,
233 Set
<String
> expectedRegions
) throws IOException
{
234 int numFound
= getNumberOfOnlineRegions(cluster
);
235 if (expectedRegions
.size() > numFound
) {
236 log("Expected to find " + expectedRegions
.size() + " but only found"
238 NavigableSet
<String
> foundRegions
=
239 HBaseTestingUtility
.getAllOnlineRegions(cluster
);
240 for (String region
: expectedRegions
) {
241 if (!foundRegions
.contains(region
)) {
242 log("Missing region: " + region
);
245 assertEquals(expectedRegions
.size(), numFound
);
246 } else if (expectedRegions
.size() < numFound
) {
247 int doubled
= numFound
- expectedRegions
.size();
248 log("Expected to find " + expectedRegions
.size() + " but found"
249 + " " + numFound
+ " (" + doubled
+ " double assignments?)");
250 NavigableSet
<String
> doubleRegions
= getDoubleAssignedRegions(cluster
);
251 for (String region
: doubleRegions
) {
252 log("Region is double assigned: " + region
);
254 assertEquals(expectedRegions
.size(), numFound
);
256 log("Success! Found expected number of " + numFound
+ " regions");
260 private NavigableSet
<String
> getDoubleAssignedRegions(
261 MiniHBaseCluster cluster
) throws IOException
{
262 NavigableSet
<String
> online
= new TreeSet
<>();
263 NavigableSet
<String
> doubled
= new TreeSet
<>();
264 for (RegionServerThread rst
: cluster
.getLiveRegionServerThreads()) {
265 for (HRegionInfo region
: ProtobufUtil
.getOnlineRegions(
266 rst
.getRegionServer().getRSRpcServices())) {
267 if(!online
.add(region
.getRegionNameAsString())) {
268 doubled
.add(region
.getRegionNameAsString());