HBASE-17532 Replaced explicit type with diamond operator
[hbase.git] / hbase-server / src / test / java / org / apache / hadoop / hbase / master / TestRollingRestart.java
blob80c6f3a875c710af807d2072540021e781354f80
1 /**
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
19 package org.apache.hadoop.hbase.master;
21 import static org.junit.Assert.assertEquals;
23 import java.io.IOException;
24 import java.util.List;
25 import java.util.NavigableSet;
26 import java.util.Set;
27 import java.util.TreeSet;
29 import org.apache.commons.logging.Log;
30 import org.apache.commons.logging.LogFactory;
31 import org.apache.hadoop.conf.Configuration;
32 import org.apache.hadoop.hbase.HBaseConfiguration;
33 import org.apache.hadoop.hbase.HBaseTestingUtility;
34 import org.apache.hadoop.hbase.HRegionInfo;
35 import org.apache.hadoop.hbase.MiniHBaseCluster;
36 import org.apache.hadoop.hbase.ServerName;
37 import org.apache.hadoop.hbase.TableName;
38 import org.apache.hadoop.hbase.client.RegionLocator;
39 import org.apache.hadoop.hbase.client.Table;
40 import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
41 import org.apache.hadoop.hbase.testclassification.LargeTests;
42 import org.apache.hadoop.hbase.testclassification.MasterTests;
43 import org.apache.hadoop.hbase.util.Bytes;
44 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
45 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
46 import org.junit.Rule;
47 import org.junit.Test;
48 import org.junit.experimental.categories.Category;
49 import org.junit.rules.TestName;
51 /**
52 * Tests the restarting of everything as done during rolling restarts.
54 @Category({MasterTests.class, LargeTests.class})
55 public class TestRollingRestart {
56 private static final Log LOG = LogFactory.getLog(TestRollingRestart.class);
58 @Rule
59 public TestName name = new TestName();
61 @Test (timeout=500000)
62 public void testBasicRollingRestart() throws Exception {
64 // Start a cluster with 2 masters and 4 regionservers
65 final int NUM_MASTERS = 2;
66 final int NUM_RS = 3;
67 final int NUM_REGIONS_TO_CREATE = 20;
69 int expectedNumRS = 3;
71 // Start the cluster
72 log("Starting cluster");
73 Configuration conf = HBaseConfiguration.create();
74 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
75 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
76 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
77 log("Waiting for active/ready master");
78 cluster.waitForActiveAndReadyMaster();
80 // Create a table with regions
81 final TableName tableName = TableName.valueOf(name.getMethodName());
82 byte [] family = Bytes.toBytes("family");
83 log("Creating table with " + NUM_REGIONS_TO_CREATE + " regions");
84 Table ht = TEST_UTIL.createMultiRegionTable(tableName, family, NUM_REGIONS_TO_CREATE);
85 int numRegions = -1;
86 try (RegionLocator r = TEST_UTIL.getConnection().getRegionLocator(tableName)) {
87 numRegions = r.getStartKeys().length;
89 numRegions += 1; // catalogs
90 log("Waiting for no more RIT\n");
91 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
92 log("Disabling table\n");
93 TEST_UTIL.getAdmin().disableTable(tableName);
94 log("Waiting for no more RIT\n");
95 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
96 NavigableSet<String> regions = HBaseTestingUtility.getAllOnlineRegions(cluster);
97 log("Verifying only catalog and namespace regions are assigned\n");
98 if (regions.size() != 2) {
99 for (String oregion : regions) log("Region still online: " + oregion);
101 assertEquals(2, regions.size());
102 log("Enabling table\n");
103 TEST_UTIL.getAdmin().enableTable(tableName);
104 log("Waiting for no more RIT\n");
105 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
106 log("Verifying there are " + numRegions + " assigned on cluster\n");
107 regions = HBaseTestingUtility.getAllOnlineRegions(cluster);
108 assertRegionsAssigned(cluster, regions);
109 assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
111 // Add a new regionserver
112 log("Adding a fourth RS");
113 RegionServerThread restarted = cluster.startRegionServer();
114 expectedNumRS++;
115 restarted.waitForServerOnline();
116 log("Additional RS is online");
117 log("Waiting for no more RIT");
118 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
119 log("Verifying there are " + numRegions + " assigned on cluster");
120 assertRegionsAssigned(cluster, regions);
121 assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
123 // Master Restarts
124 List<MasterThread> masterThreads = cluster.getMasterThreads();
125 MasterThread activeMaster = null;
126 MasterThread backupMaster = null;
127 assertEquals(2, masterThreads.size());
128 if (masterThreads.get(0).getMaster().isActiveMaster()) {
129 activeMaster = masterThreads.get(0);
130 backupMaster = masterThreads.get(1);
131 } else {
132 activeMaster = masterThreads.get(1);
133 backupMaster = masterThreads.get(0);
136 // Bring down the backup master
137 log("Stopping backup master\n\n");
138 backupMaster.getMaster().stop("Stop of backup during rolling restart");
139 cluster.hbaseCluster.waitOnMaster(backupMaster);
141 // Bring down the primary master
142 log("Stopping primary master\n\n");
143 activeMaster.getMaster().stop("Stop of active during rolling restart");
144 cluster.hbaseCluster.waitOnMaster(activeMaster);
146 // Start primary master
147 log("Restarting primary master\n\n");
148 activeMaster = cluster.startMaster();
149 cluster.waitForActiveAndReadyMaster();
151 // Start backup master
152 log("Restarting backup master\n\n");
153 backupMaster = cluster.startMaster();
155 assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
157 // RegionServer Restarts
159 // Bring them down, one at a time, waiting between each to complete
160 List<RegionServerThread> regionServers =
161 cluster.getLiveRegionServerThreads();
162 int num = 1;
163 int total = regionServers.size();
164 for (RegionServerThread rst : regionServers) {
165 ServerName serverName = rst.getRegionServer().getServerName();
166 log("Stopping region server " + num + " of " + total + " [ " +
167 serverName + "]");
168 rst.getRegionServer().stop("Stopping RS during rolling restart");
169 cluster.hbaseCluster.waitOnRegionServer(rst);
170 log("Waiting for RS shutdown to be handled by master");
171 waitForRSShutdownToStartAndFinish(activeMaster, serverName);
172 log("RS shutdown done, waiting for no more RIT");
173 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
174 log("Verifying there are " + numRegions + " assigned on cluster");
175 assertRegionsAssigned(cluster, regions);
176 expectedNumRS--;
177 assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
178 log("Restarting region server " + num + " of " + total);
179 restarted = cluster.startRegionServer();
180 restarted.waitForServerOnline();
181 expectedNumRS++;
182 log("Region server " + num + " is back online");
183 log("Waiting for no more RIT");
184 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
185 log("Verifying there are " + numRegions + " assigned on cluster");
186 assertRegionsAssigned(cluster, regions);
187 assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
188 num++;
190 Thread.sleep(1000);
191 assertRegionsAssigned(cluster, regions);
193 // TODO: Bring random 3 of 4 RS down at the same time
195 ht.close();
196 // Stop the cluster
197 TEST_UTIL.shutdownMiniCluster();
200 private void waitForRSShutdownToStartAndFinish(MasterThread activeMaster,
201 ServerName serverName) throws InterruptedException {
202 ServerManager sm = activeMaster.getMaster().getServerManager();
203 // First wait for it to be in dead list
204 while (!sm.getDeadServers().isDeadServer(serverName)) {
205 log("Waiting for [" + serverName + "] to be listed as dead in master");
206 Thread.sleep(1);
208 log("Server [" + serverName + "] marked as dead, waiting for it to " +
209 "finish dead processing");
210 while (sm.areDeadServersInProgress()) {
211 log("Server [" + serverName + "] still being processed, waiting");
212 Thread.sleep(100);
214 log("Server [" + serverName + "] done with server shutdown processing");
217 private void log(String msg) {
218 LOG.debug("\n\nTRR: " + msg + "\n");
221 private int getNumberOfOnlineRegions(MiniHBaseCluster cluster) {
222 int numFound = 0;
223 for (RegionServerThread rst : cluster.getLiveRegionServerThreads()) {
224 numFound += rst.getRegionServer().getNumberOfOnlineRegions();
226 for (MasterThread mt : cluster.getMasterThreads()) {
227 numFound += mt.getMaster().getNumberOfOnlineRegions();
229 return numFound;
232 private void assertRegionsAssigned(MiniHBaseCluster cluster,
233 Set<String> expectedRegions) throws IOException {
234 int numFound = getNumberOfOnlineRegions(cluster);
235 if (expectedRegions.size() > numFound) {
236 log("Expected to find " + expectedRegions.size() + " but only found"
237 + " " + numFound);
238 NavigableSet<String> foundRegions =
239 HBaseTestingUtility.getAllOnlineRegions(cluster);
240 for (String region : expectedRegions) {
241 if (!foundRegions.contains(region)) {
242 log("Missing region: " + region);
245 assertEquals(expectedRegions.size(), numFound);
246 } else if (expectedRegions.size() < numFound) {
247 int doubled = numFound - expectedRegions.size();
248 log("Expected to find " + expectedRegions.size() + " but found"
249 + " " + numFound + " (" + doubled + " double assignments?)");
250 NavigableSet<String> doubleRegions = getDoubleAssignedRegions(cluster);
251 for (String region : doubleRegions) {
252 log("Region is double assigned: " + region);
254 assertEquals(expectedRegions.size(), numFound);
255 } else {
256 log("Success! Found expected number of " + numFound + " regions");
260 private NavigableSet<String> getDoubleAssignedRegions(
261 MiniHBaseCluster cluster) throws IOException {
262 NavigableSet<String> online = new TreeSet<>();
263 NavigableSet<String> doubled = new TreeSet<>();
264 for (RegionServerThread rst : cluster.getLiveRegionServerThreads()) {
265 for (HRegionInfo region : ProtobufUtil.getOnlineRegions(
266 rst.getRegionServer().getRSRpcServices())) {
267 if(!online.add(region.getRegionNameAsString())) {
268 doubled.add(region.getRegionNameAsString());
272 return doubled;