HBASE-24033 Add ut for loading the corrupt recovered hfiles (#1322)
[hbase.git] / hbase-server / src / test / java / org / apache / hadoop / hbase / TestRegionRebalancing.java
blob170182ed6a6f2590a5209f40213eb891a4c177e2
1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org.apache.hadoop.hbase;
20 import static org.junit.Assert.assertEquals;
21 import static org.junit.Assert.fail;
23 import java.io.IOException;
24 import java.io.InterruptedIOException;
25 import java.util.ArrayList;
26 import java.util.Arrays;
27 import java.util.Collection;
28 import java.util.List;
29 import org.apache.hadoop.hbase.client.Admin;
30 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
31 import org.apache.hadoop.hbase.client.Connection;
32 import org.apache.hadoop.hbase.client.ConnectionFactory;
33 import org.apache.hadoop.hbase.client.RegionInfo;
34 import org.apache.hadoop.hbase.client.RegionLocator;
35 import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
36 import org.apache.hadoop.hbase.regionserver.HRegionServer;
37 import org.apache.hadoop.hbase.testclassification.FlakeyTests;
38 import org.apache.hadoop.hbase.testclassification.LargeTests;
39 import org.apache.hadoop.hbase.util.Bytes;
40 import org.apache.hadoop.hbase.util.JVMClusterUtil;
41 import org.apache.hadoop.hbase.util.Threads;
42 import org.junit.After;
43 import org.junit.Before;
44 import org.junit.ClassRule;
45 import org.junit.Test;
46 import org.junit.experimental.categories.Category;
47 import org.junit.runner.RunWith;
48 import org.junit.runners.Parameterized;
49 import org.junit.runners.Parameterized.Parameters;
50 import org.slf4j.Logger;
51 import org.slf4j.LoggerFactory;
53 import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
55 /**
56 * Test whether region re-balancing works. (HBASE-71)
57 * The test only works for cluster wide balancing, not per table wide.
58 * Increase the margin a little to make StochasticLoadBalancer result acceptable.
60 @Category({FlakeyTests.class, LargeTests.class})
61 @RunWith(value = Parameterized.class)
62 public class TestRegionRebalancing {
64 @ClassRule
65 public static final HBaseClassTestRule CLASS_RULE =
66 HBaseClassTestRule.forClass(TestRegionRebalancing.class);
68 @Parameters
69 public static Collection<Object[]> data() {
70 Object[][] balancers =
71 new String[][] { { "org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer" },
72 { "org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer" } };
73 return Arrays.asList(balancers);
76 private static final byte[] FAMILY_NAME = Bytes.toBytes("col");
77 private static final Logger LOG = LoggerFactory.getLogger(TestRegionRebalancing.class);
78 private final HBaseTestingUtility UTIL = new HBaseTestingUtility();
79 private RegionLocator regionLocator;
80 private TableDescriptorBuilder.ModifyableTableDescriptor tableDescriptor;
81 private String balancerName;
83 public TestRegionRebalancing(String balancerName) {
84 this.balancerName = balancerName;
88 @After
89 public void after() throws Exception {
90 UTIL.shutdownMiniCluster();
93 @Before
94 public void before() throws Exception {
95 UTIL.getConfiguration().set("hbase.master.loadbalancer.class", this.balancerName);
96 // set minCostNeedBalance to 0, make sure balancer run
97 UTIL.startMiniCluster(1);
99 this.tableDescriptor = new TableDescriptorBuilder.ModifyableTableDescriptor(
100 TableName.valueOf("test"));
101 this.tableDescriptor.setColumnFamily(
102 new ColumnFamilyDescriptorBuilder.ModifyableColumnFamilyDescriptor(FAMILY_NAME));
106 * For HBASE-71. Try a few different configurations of starting and stopping
107 * region servers to see if the assignment or regions is pretty balanced.
108 * @throws IOException
109 * @throws InterruptedException
111 @Test
112 public void testRebalanceOnRegionServerNumberChange()
113 throws IOException, InterruptedException {
114 try(Connection connection = ConnectionFactory.createConnection(UTIL.getConfiguration());
115 Admin admin = connection.getAdmin()) {
116 admin.createTable(this.tableDescriptor, Arrays.copyOfRange(HBaseTestingUtility.KEYS,
117 1, HBaseTestingUtility.KEYS.length));
118 this.regionLocator = connection.getRegionLocator(this.tableDescriptor.getTableName());
120 MetaTableAccessor.fullScanMetaAndPrint(admin.getConnection());
122 assertEquals("Test table should have right number of regions",
123 HBaseTestingUtility.KEYS.length,
124 this.regionLocator.getStartKeys().length);
126 // verify that the region assignments are balanced to start out
127 assertRegionsAreBalanced();
129 // add a region server - total of 2
130 LOG.info("Started second server=" +
131 UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
132 UTIL.getHBaseCluster().getMaster().balance();
133 assertRegionsAreBalanced();
135 // On a balanced cluster, calling balance() should return true
136 assert(UTIL.getHBaseCluster().getMaster().balance() == true);
138 // if we add a server, then the balance() call should return true
139 // add a region server - total of 3
140 LOG.info("Started third server=" +
141 UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
142 waitForAllRegionsAssigned();
143 assert(UTIL.getHBaseCluster().getMaster().balance() == true);
144 assertRegionsAreBalanced();
146 // kill a region server - total of 2
147 LOG.info("Stopped third server=" + UTIL.getHBaseCluster().stopRegionServer(2, false));
148 UTIL.getHBaseCluster().waitOnRegionServer(2);
149 waitOnCrashProcessing();
150 UTIL.getHBaseCluster().getMaster().balance();
151 assertRegionsAreBalanced();
153 // start two more region servers - total of 4
154 LOG.info("Readding third server=" +
155 UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
156 LOG.info("Added fourth server=" +
157 UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
158 waitOnCrashProcessing();
159 waitForAllRegionsAssigned();
160 assert(UTIL.getHBaseCluster().getMaster().balance() == true);
161 assertRegionsAreBalanced();
162 for (int i = 0; i < 6; i++){
163 LOG.info("Adding " + (i + 5) + "th region server");
164 UTIL.getHBaseCluster().startRegionServer();
166 waitForAllRegionsAssigned();
167 assert(UTIL.getHBaseCluster().getMaster().balance() == true);
168 assertRegionsAreBalanced();
169 regionLocator.close();
174 * Wait on crash processing. Balancer won't run if processing a crashed server.
176 private void waitOnCrashProcessing() {
177 while (UTIL.getHBaseCluster().getMaster().getServerManager().areDeadServersInProgress()) {
178 LOG.info("Waiting on processing of crashed server before proceeding...");
179 Threads.sleep(1000);
184 * Determine if regions are balanced. Figure out the total, divide by the
185 * number of online servers, then test if each server is +/- 1 of average
186 * rounded up.
188 private void assertRegionsAreBalanced() throws IOException {
189 // TODO: Fix this test. Old balancer used to run with 'slop'. New
190 // balancer does not.
191 boolean success = false;
192 float slop = (float)UTIL.getConfiguration().getFloat("hbase.regions.slop", 0.1f);
193 if (slop <= 0) slop = 1;
195 for (int i = 0; i < 5; i++) {
196 success = true;
197 // make sure all the regions are reassigned before we test balance
198 waitForAllRegionsAssigned();
200 long regionCount = UTIL.getMiniHBaseCluster().countServedRegions();
201 List<HRegionServer> servers = getOnlineRegionServers();
202 double avg = (double)regionCount / (double)servers.size();
203 int avgLoadPlusSlop = (int)Math.ceil(avg * (1 + slop));
204 int avgLoadMinusSlop = (int)Math.floor(avg * (1 - slop)) - 1;
205 // Increase the margin a little to accommodate StochasticLoadBalancer
206 if (this.balancerName.contains("StochasticLoadBalancer")) {
207 avgLoadPlusSlop++;
208 avgLoadMinusSlop--;
210 LOG.debug("There are " + servers.size() + " servers and " + regionCount
211 + " regions. Load Average: " + avg + " low border: " + avgLoadMinusSlop
212 + ", up border: " + avgLoadPlusSlop + "; attempt: " + i);
214 for (HRegionServer server : servers) {
215 int serverLoad =
216 ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size();
217 LOG.debug(server.getServerName() + " Avg: " + avg + " actual: " + serverLoad);
218 if (!(avg > 2.0 && serverLoad <= avgLoadPlusSlop
219 && serverLoad >= avgLoadMinusSlop)) {
220 for (RegionInfo hri :
221 ProtobufUtil.getOnlineRegions(server.getRSRpcServices())) {
222 if (hri.isMetaRegion()) serverLoad--;
223 // LOG.debug(hri.getRegionNameAsString());
225 if (!(serverLoad <= avgLoadPlusSlop && serverLoad >= avgLoadMinusSlop)) {
226 LOG.debug(server.getServerName() + " Isn't balanced!!! Avg: " + avg +
227 " actual: " + serverLoad + " slop: " + slop);
228 success = false;
229 break;
234 if (!success) {
235 // one or more servers are not balanced. sleep a little to give it a
236 // chance to catch up. then, go back to the retry loop.
237 try {
238 Thread.sleep(10000);
239 } catch (InterruptedException e) {}
241 UTIL.getHBaseCluster().getMaster().balance();
242 continue;
245 // if we get here, all servers were balanced, so we should just return.
246 return;
248 // if we get here, we tried 5 times and never got to short circuit out of
249 // the retry loop, so this is a failure.
250 fail("After 5 attempts, region assignments were not balanced.");
253 private List<HRegionServer> getOnlineRegionServers() {
254 List<HRegionServer> list = new ArrayList<>();
255 for (JVMClusterUtil.RegionServerThread rst :
256 UTIL.getHBaseCluster().getRegionServerThreads()) {
257 if (rst.getRegionServer().isOnline()) {
258 list.add(rst.getRegionServer());
261 return list;
265 * Wait until all the regions are assigned.
267 private void waitForAllRegionsAssigned() throws IOException {
268 int totalRegions = HBaseTestingUtility.KEYS.length;
269 try {
270 Thread.sleep(200);
271 } catch (InterruptedException e) {
272 throw new InterruptedIOException();
274 while (UTIL.getMiniHBaseCluster().countServedRegions() < totalRegions) {
275 // while (!cluster.getMaster().allRegionsAssigned()) {
276 LOG.debug("Waiting for there to be "+ totalRegions +" regions, but there are "
277 + UTIL.getMiniHBaseCluster().countServedRegions() + " right now.");
278 try {
279 Thread.sleep(200);
280 } catch (InterruptedException e) {
281 throw new InterruptedIOException();
284 UTIL.waitUntilNoRegionsInTransition();