2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org
.apache
.hadoop
.hbase
.master
;
20 import static org
.junit
.Assert
.assertEquals
;
22 import java
.io
.IOException
;
23 import java
.util
.Arrays
;
24 import java
.util
.Collection
;
25 import java
.util
.List
;
26 import java
.util
.NavigableSet
;
28 import java
.util
.TreeSet
;
29 import org
.apache
.hadoop
.conf
.Configuration
;
30 import org
.apache
.hadoop
.hbase
.HBaseClassTestRule
;
31 import org
.apache
.hadoop
.hbase
.HBaseConfiguration
;
32 import org
.apache
.hadoop
.hbase
.HBaseTestingUtility
;
33 import org
.apache
.hadoop
.hbase
.HConstants
;
34 import org
.apache
.hadoop
.hbase
.MiniHBaseCluster
;
35 import org
.apache
.hadoop
.hbase
.ServerName
;
36 import org
.apache
.hadoop
.hbase
.StartMiniClusterOption
;
37 import org
.apache
.hadoop
.hbase
.TableName
;
38 import org
.apache
.hadoop
.hbase
.client
.RegionInfo
;
39 import org
.apache
.hadoop
.hbase
.client
.RegionLocator
;
40 import org
.apache
.hadoop
.hbase
.client
.Table
;
41 import org
.apache
.hadoop
.hbase
.testclassification
.LargeTests
;
42 import org
.apache
.hadoop
.hbase
.testclassification
.MasterTests
;
43 import org
.apache
.hadoop
.hbase
.util
.Bytes
;
44 import org
.apache
.hadoop
.hbase
.util
.JVMClusterUtil
.MasterThread
;
45 import org
.apache
.hadoop
.hbase
.util
.JVMClusterUtil
.RegionServerThread
;
46 import org
.junit
.ClassRule
;
47 import org
.junit
.Rule
;
48 import org
.junit
.Test
;
49 import org
.junit
.experimental
.categories
.Category
;
50 import org
.junit
.rules
.TestName
;
51 import org
.junit
.runner
.RunWith
;
52 import org
.junit
.runners
.Parameterized
;
53 import org
.slf4j
.Logger
;
54 import org
.slf4j
.LoggerFactory
;
56 import org
.apache
.hadoop
.hbase
.shaded
.protobuf
.ProtobufUtil
;
59 * Tests the restarting of everything as done during rolling restarts.
61 @RunWith(Parameterized
.class)
62 @Category({MasterTests
.class, LargeTests
.class})
63 public class TestRollingRestart
{
66 public static final HBaseClassTestRule CLASS_RULE
=
67 HBaseClassTestRule
.forClass(TestRollingRestart
.class);
69 private static final Logger LOG
= LoggerFactory
.getLogger(TestRollingRestart
.class);
72 public TestName name
= new TestName();
74 @Parameterized.Parameter
75 public boolean splitWALCoordinatedByZK
;
78 public void testBasicRollingRestart() throws Exception
{
80 // Start a cluster with 2 masters and 4 regionservers
81 final int NUM_MASTERS
= 2;
83 final int NUM_REGIONS_TO_CREATE
= 20;
85 int expectedNumRS
= 3;
88 log("Starting cluster");
89 Configuration conf
= HBaseConfiguration
.create();
90 conf
.setBoolean(HConstants
.HBASE_SPLIT_WAL_COORDINATED_BY_ZK
,
91 splitWALCoordinatedByZK
);
92 HBaseTestingUtility TEST_UTIL
= new HBaseTestingUtility(conf
);
93 StartMiniClusterOption option
= StartMiniClusterOption
.builder()
94 .numMasters(NUM_MASTERS
).numRegionServers(NUM_RS
).numDataNodes(NUM_RS
).build();
95 TEST_UTIL
.startMiniCluster(option
);
96 MiniHBaseCluster cluster
= TEST_UTIL
.getHBaseCluster();
97 log("Waiting for active/ready master");
98 cluster
.waitForActiveAndReadyMaster();
100 // Create a table with regions
101 final TableName tableName
=
102 TableName
.valueOf(name
.getMethodName().replaceAll("[\\[|\\]]", "-"));
103 byte [] family
= Bytes
.toBytes("family");
104 log("Creating table with " + NUM_REGIONS_TO_CREATE
+ " regions");
105 Table ht
= TEST_UTIL
.createMultiRegionTable(tableName
, family
, NUM_REGIONS_TO_CREATE
);
107 try (RegionLocator r
= TEST_UTIL
.getConnection().getRegionLocator(tableName
)) {
108 numRegions
= r
.getStartKeys().length
;
110 numRegions
+= 1; // catalogs
111 log("Waiting for no more RIT\n");
112 TEST_UTIL
.waitUntilNoRegionsInTransition(60000);
113 log("Disabling table\n");
114 TEST_UTIL
.getAdmin().disableTable(tableName
);
115 log("Waiting for no more RIT\n");
116 TEST_UTIL
.waitUntilNoRegionsInTransition(60000);
117 NavigableSet
<String
> regions
= HBaseTestingUtility
.getAllOnlineRegions(cluster
);
118 log("Verifying only catalog region is assigned\n");
119 if (regions
.size() != 1) {
120 for (String oregion
: regions
) {
121 log("Region still online: " + oregion
);
124 assertEquals(1, regions
.size());
125 log("Enabling table\n");
126 TEST_UTIL
.getAdmin().enableTable(tableName
);
127 log("Waiting for no more RIT\n");
128 TEST_UTIL
.waitUntilNoRegionsInTransition(60000);
129 log("Verifying there are " + numRegions
+ " assigned on cluster\n");
130 regions
= HBaseTestingUtility
.getAllOnlineRegions(cluster
);
131 assertRegionsAssigned(cluster
, regions
);
132 assertEquals(expectedNumRS
, cluster
.getRegionServerThreads().size());
134 // Add a new regionserver
135 log("Adding a fourth RS");
136 RegionServerThread restarted
= cluster
.startRegionServer();
138 restarted
.waitForServerOnline();
139 log("Additional RS is online");
140 log("Waiting for no more RIT");
141 TEST_UTIL
.waitUntilNoRegionsInTransition(60000);
142 log("Verifying there are " + numRegions
+ " assigned on cluster");
143 assertRegionsAssigned(cluster
, regions
);
144 assertEquals(expectedNumRS
, cluster
.getRegionServerThreads().size());
147 List
<MasterThread
> masterThreads
= cluster
.getMasterThreads();
148 MasterThread activeMaster
= null;
149 MasterThread backupMaster
= null;
150 assertEquals(2, masterThreads
.size());
151 if (masterThreads
.get(0).getMaster().isActiveMaster()) {
152 activeMaster
= masterThreads
.get(0);
153 backupMaster
= masterThreads
.get(1);
155 activeMaster
= masterThreads
.get(1);
156 backupMaster
= masterThreads
.get(0);
159 // Bring down the backup master
160 log("Stopping backup master\n\n");
161 backupMaster
.getMaster().stop("Stop of backup during rolling restart");
162 cluster
.hbaseCluster
.waitOnMaster(backupMaster
);
164 // Bring down the primary master
165 log("Stopping primary master\n\n");
166 activeMaster
.getMaster().stop("Stop of active during rolling restart");
167 cluster
.hbaseCluster
.waitOnMaster(activeMaster
);
169 // Start primary master
170 log("Restarting primary master\n\n");
171 activeMaster
= cluster
.startMaster();
172 cluster
.waitForActiveAndReadyMaster();
174 // Start backup master
175 log("Restarting backup master\n\n");
176 backupMaster
= cluster
.startMaster();
178 assertEquals(expectedNumRS
, cluster
.getRegionServerThreads().size());
180 // RegionServer Restarts
182 // Bring them down, one at a time, waiting between each to complete
183 List
<RegionServerThread
> regionServers
=
184 cluster
.getLiveRegionServerThreads();
186 int total
= regionServers
.size();
187 for (RegionServerThread rst
: regionServers
) {
188 ServerName serverName
= rst
.getRegionServer().getServerName();
189 log("Stopping region server " + num
+ " of " + total
+ " [ " +
191 rst
.getRegionServer().stop("Stopping RS during rolling restart");
192 cluster
.hbaseCluster
.waitOnRegionServer(rst
);
193 log("Waiting for RS shutdown to be handled by master");
194 waitForRSShutdownToStartAndFinish(activeMaster
, serverName
);
195 log("RS shutdown done, waiting for no more RIT");
196 TEST_UTIL
.waitUntilNoRegionsInTransition(60000);
197 log("Verifying there are " + numRegions
+ " assigned on cluster");
198 assertRegionsAssigned(cluster
, regions
);
200 assertEquals(expectedNumRS
, cluster
.getRegionServerThreads().size());
201 log("Restarting region server " + num
+ " of " + total
);
202 restarted
= cluster
.startRegionServer();
203 restarted
.waitForServerOnline();
205 log("Region server " + num
+ " is back online");
206 log("Waiting for no more RIT");
207 TEST_UTIL
.waitUntilNoRegionsInTransition(60000);
208 log("Verifying there are " + numRegions
+ " assigned on cluster");
209 assertRegionsAssigned(cluster
, regions
);
210 assertEquals(expectedNumRS
, cluster
.getRegionServerThreads().size());
214 assertRegionsAssigned(cluster
, regions
);
216 // TODO: Bring random 3 of 4 RS down at the same time
220 TEST_UTIL
.shutdownMiniCluster();
223 private void waitForRSShutdownToStartAndFinish(MasterThread activeMaster
,
224 ServerName serverName
) throws InterruptedException
{
225 ServerManager sm
= activeMaster
.getMaster().getServerManager();
226 // First wait for it to be in dead list
227 while (!sm
.getDeadServers().isDeadServer(serverName
)) {
228 log("Waiting for [" + serverName
+ "] to be listed as dead in master");
231 log("Server [" + serverName
+ "] marked as dead, waiting for it to " +
232 "finish dead processing");
233 while (sm
.areDeadServersInProgress()) {
234 log("Server [" + serverName
+ "] still being processed, waiting");
237 log("Server [" + serverName
+ "] done with server shutdown processing");
240 private void log(String msg
) {
241 LOG
.debug("\n\nTRR: " + msg
+ "\n");
244 private int getNumberOfOnlineRegions(MiniHBaseCluster cluster
) {
246 for (RegionServerThread rst
: cluster
.getLiveRegionServerThreads()) {
247 numFound
+= rst
.getRegionServer().getNumberOfOnlineRegions();
249 for (MasterThread mt
: cluster
.getMasterThreads()) {
250 numFound
+= mt
.getMaster().getNumberOfOnlineRegions();
255 private void assertRegionsAssigned(MiniHBaseCluster cluster
,
256 Set
<String
> expectedRegions
) throws IOException
{
257 int numFound
= getNumberOfOnlineRegions(cluster
);
258 if (expectedRegions
.size() > numFound
) {
259 log("Expected to find " + expectedRegions
.size() + " but only found"
261 NavigableSet
<String
> foundRegions
=
262 HBaseTestingUtility
.getAllOnlineRegions(cluster
);
263 for (String region
: expectedRegions
) {
264 if (!foundRegions
.contains(region
)) {
265 log("Missing region: " + region
);
268 assertEquals(expectedRegions
.size(), numFound
);
269 } else if (expectedRegions
.size() < numFound
) {
270 int doubled
= numFound
- expectedRegions
.size();
271 log("Expected to find " + expectedRegions
.size() + " but found"
272 + " " + numFound
+ " (" + doubled
+ " double assignments?)");
273 NavigableSet
<String
> doubleRegions
= getDoubleAssignedRegions(cluster
);
274 for (String region
: doubleRegions
) {
275 log("Region is double assigned: " + region
);
277 assertEquals(expectedRegions
.size(), numFound
);
279 log("Success! Found expected number of " + numFound
+ " regions");
283 private NavigableSet
<String
> getDoubleAssignedRegions(
284 MiniHBaseCluster cluster
) throws IOException
{
285 NavigableSet
<String
> online
= new TreeSet
<>();
286 NavigableSet
<String
> doubled
= new TreeSet
<>();
287 for (RegionServerThread rst
: cluster
.getLiveRegionServerThreads()) {
288 for (RegionInfo region
: ProtobufUtil
.getOnlineRegions(
289 rst
.getRegionServer().getRSRpcServices())) {
290 if(!online
.add(region
.getRegionNameAsString())) {
291 doubled
.add(region
.getRegionNameAsString());
299 @Parameterized.Parameters
300 public static Collection
coordinatedByZK() {
301 return Arrays
.asList(false, true);