2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org
.apache
.hadoop
.hbase
;
20 import static org
.junit
.Assert
.assertEquals
;
21 import static org
.junit
.Assert
.fail
;
23 import java
.io
.IOException
;
24 import java
.io
.InterruptedIOException
;
25 import java
.util
.ArrayList
;
26 import java
.util
.Arrays
;
27 import java
.util
.Collection
;
28 import java
.util
.List
;
29 import org
.apache
.hadoop
.hbase
.client
.Admin
;
30 import org
.apache
.hadoop
.hbase
.client
.ColumnFamilyDescriptorBuilder
;
31 import org
.apache
.hadoop
.hbase
.client
.Connection
;
32 import org
.apache
.hadoop
.hbase
.client
.ConnectionFactory
;
33 import org
.apache
.hadoop
.hbase
.client
.RegionInfo
;
34 import org
.apache
.hadoop
.hbase
.client
.RegionLocator
;
35 import org
.apache
.hadoop
.hbase
.client
.TableDescriptorBuilder
;
36 import org
.apache
.hadoop
.hbase
.regionserver
.HRegionServer
;
37 import org
.apache
.hadoop
.hbase
.testclassification
.FlakeyTests
;
38 import org
.apache
.hadoop
.hbase
.testclassification
.LargeTests
;
39 import org
.apache
.hadoop
.hbase
.util
.Bytes
;
40 import org
.apache
.hadoop
.hbase
.util
.JVMClusterUtil
;
41 import org
.apache
.hadoop
.hbase
.util
.Threads
;
42 import org
.junit
.After
;
43 import org
.junit
.Before
;
44 import org
.junit
.ClassRule
;
45 import org
.junit
.Test
;
46 import org
.junit
.experimental
.categories
.Category
;
47 import org
.junit
.runner
.RunWith
;
48 import org
.junit
.runners
.Parameterized
;
49 import org
.junit
.runners
.Parameterized
.Parameters
;
50 import org
.slf4j
.Logger
;
51 import org
.slf4j
.LoggerFactory
;
53 import org
.apache
.hadoop
.hbase
.shaded
.protobuf
.ProtobufUtil
;
56 * Test whether region re-balancing works. (HBASE-71)
57 * The test only works for cluster wide balancing, not per table wide.
58 * Increase the margin a little to make StochasticLoadBalancer result acceptable.
60 @Category({FlakeyTests
.class, LargeTests
.class})
61 @RunWith(value
= Parameterized
.class)
62 public class TestRegionRebalancing
{
65 public static final HBaseClassTestRule CLASS_RULE
=
66 HBaseClassTestRule
.forClass(TestRegionRebalancing
.class);
69 public static Collection
<Object
[]> data() {
70 Object
[][] balancers
=
71 new String
[][] { { "org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer" },
72 { "org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer" } };
73 return Arrays
.asList(balancers
);
76 private static final byte[] FAMILY_NAME
= Bytes
.toBytes("col");
77 private static final Logger LOG
= LoggerFactory
.getLogger(TestRegionRebalancing
.class);
78 private final HBaseTestingUtility UTIL
= new HBaseTestingUtility();
79 private RegionLocator regionLocator
;
80 private TableDescriptorBuilder
.ModifyableTableDescriptor tableDescriptor
;
81 private String balancerName
;
83 public TestRegionRebalancing(String balancerName
) {
84 this.balancerName
= balancerName
;
89 public void after() throws Exception
{
90 UTIL
.shutdownMiniCluster();
94 public void before() throws Exception
{
95 UTIL
.getConfiguration().set("hbase.master.loadbalancer.class", this.balancerName
);
96 // set minCostNeedBalance to 0, make sure balancer run
97 UTIL
.startMiniCluster(1);
99 this.tableDescriptor
= new TableDescriptorBuilder
.ModifyableTableDescriptor(
100 TableName
.valueOf("test"));
101 this.tableDescriptor
.setColumnFamily(
102 new ColumnFamilyDescriptorBuilder
.ModifyableColumnFamilyDescriptor(FAMILY_NAME
));
106 * For HBASE-71. Try a few different configurations of starting and stopping
107 * region servers to see if the assignment or regions is pretty balanced.
108 * @throws IOException
109 * @throws InterruptedException
112 public void testRebalanceOnRegionServerNumberChange()
113 throws IOException
, InterruptedException
{
114 try(Connection connection
= ConnectionFactory
.createConnection(UTIL
.getConfiguration());
115 Admin admin
= connection
.getAdmin()) {
116 admin
.createTable(this.tableDescriptor
, Arrays
.copyOfRange(HBaseTestingUtility
.KEYS
,
117 1, HBaseTestingUtility
.KEYS
.length
));
118 this.regionLocator
= connection
.getRegionLocator(this.tableDescriptor
.getTableName());
120 MetaTableAccessor
.fullScanMetaAndPrint(admin
.getConnection());
122 assertEquals("Test table should have right number of regions",
123 HBaseTestingUtility
.KEYS
.length
,
124 this.regionLocator
.getStartKeys().length
);
126 // verify that the region assignments are balanced to start out
127 assertRegionsAreBalanced();
129 // add a region server - total of 2
130 LOG
.info("Started second server=" +
131 UTIL
.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
132 UTIL
.getHBaseCluster().getMaster().balance();
133 assertRegionsAreBalanced();
135 // On a balanced cluster, calling balance() should return true
136 assert(UTIL
.getHBaseCluster().getMaster().balance() == true);
138 // if we add a server, then the balance() call should return true
139 // add a region server - total of 3
140 LOG
.info("Started third server=" +
141 UTIL
.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
142 waitForAllRegionsAssigned();
143 assert(UTIL
.getHBaseCluster().getMaster().balance() == true);
144 assertRegionsAreBalanced();
146 // kill a region server - total of 2
147 LOG
.info("Stopped third server=" + UTIL
.getHBaseCluster().stopRegionServer(2, false));
148 UTIL
.getHBaseCluster().waitOnRegionServer(2);
149 waitOnCrashProcessing();
150 UTIL
.getHBaseCluster().getMaster().balance();
151 assertRegionsAreBalanced();
153 // start two more region servers - total of 4
154 LOG
.info("Readding third server=" +
155 UTIL
.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
156 LOG
.info("Added fourth server=" +
157 UTIL
.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
158 waitOnCrashProcessing();
159 waitForAllRegionsAssigned();
160 assert(UTIL
.getHBaseCluster().getMaster().balance() == true);
161 assertRegionsAreBalanced();
162 for (int i
= 0; i
< 6; i
++){
163 LOG
.info("Adding " + (i
+ 5) + "th region server");
164 UTIL
.getHBaseCluster().startRegionServer();
166 waitForAllRegionsAssigned();
167 assert(UTIL
.getHBaseCluster().getMaster().balance() == true);
168 assertRegionsAreBalanced();
169 regionLocator
.close();
174 * Wait on crash processing. Balancer won't run if processing a crashed server.
176 private void waitOnCrashProcessing() {
177 while (UTIL
.getHBaseCluster().getMaster().getServerManager().areDeadServersInProgress()) {
178 LOG
.info("Waiting on processing of crashed server before proceeding...");
184 * Determine if regions are balanced. Figure out the total, divide by the
185 * number of online servers, then test if each server is +/- 1 of average
188 private void assertRegionsAreBalanced() throws IOException
{
189 // TODO: Fix this test. Old balancer used to run with 'slop'. New
190 // balancer does not.
191 boolean success
= false;
192 float slop
= (float)UTIL
.getConfiguration().getFloat("hbase.regions.slop", 0.1f
);
193 if (slop
<= 0) slop
= 1;
195 for (int i
= 0; i
< 5; i
++) {
197 // make sure all the regions are reassigned before we test balance
198 waitForAllRegionsAssigned();
200 long regionCount
= UTIL
.getMiniHBaseCluster().countServedRegions();
201 List
<HRegionServer
> servers
= getOnlineRegionServers();
202 double avg
= (double)regionCount
/ (double)servers
.size();
203 int avgLoadPlusSlop
= (int)Math
.ceil(avg
* (1 + slop
));
204 int avgLoadMinusSlop
= (int)Math
.floor(avg
* (1 - slop
)) - 1;
205 // Increase the margin a little to accommodate StochasticLoadBalancer
206 if (this.balancerName
.contains("StochasticLoadBalancer")) {
210 LOG
.debug("There are " + servers
.size() + " servers and " + regionCount
211 + " regions. Load Average: " + avg
+ " low border: " + avgLoadMinusSlop
212 + ", up border: " + avgLoadPlusSlop
+ "; attempt: " + i
);
214 for (HRegionServer server
: servers
) {
216 ProtobufUtil
.getOnlineRegions(server
.getRSRpcServices()).size();
217 LOG
.debug(server
.getServerName() + " Avg: " + avg
+ " actual: " + serverLoad
);
218 if (!(avg
> 2.0 && serverLoad
<= avgLoadPlusSlop
219 && serverLoad
>= avgLoadMinusSlop
)) {
220 for (RegionInfo hri
:
221 ProtobufUtil
.getOnlineRegions(server
.getRSRpcServices())) {
222 if (hri
.isMetaRegion()) serverLoad
--;
223 // LOG.debug(hri.getRegionNameAsString());
225 if (!(serverLoad
<= avgLoadPlusSlop
&& serverLoad
>= avgLoadMinusSlop
)) {
226 LOG
.debug(server
.getServerName() + " Isn't balanced!!! Avg: " + avg
+
227 " actual: " + serverLoad
+ " slop: " + slop
);
235 // one or more servers are not balanced. sleep a little to give it a
236 // chance to catch up. then, go back to the retry loop.
239 } catch (InterruptedException e
) {}
241 UTIL
.getHBaseCluster().getMaster().balance();
245 // if we get here, all servers were balanced, so we should just return.
248 // if we get here, we tried 5 times and never got to short circuit out of
249 // the retry loop, so this is a failure.
250 fail("After 5 attempts, region assignments were not balanced.");
253 private List
<HRegionServer
> getOnlineRegionServers() {
254 List
<HRegionServer
> list
= new ArrayList
<>();
255 for (JVMClusterUtil
.RegionServerThread rst
:
256 UTIL
.getHBaseCluster().getRegionServerThreads()) {
257 if (rst
.getRegionServer().isOnline()) {
258 list
.add(rst
.getRegionServer());
265 * Wait until all the regions are assigned.
267 private void waitForAllRegionsAssigned() throws IOException
{
268 int totalRegions
= HBaseTestingUtility
.KEYS
.length
;
271 } catch (InterruptedException e
) {
272 throw new InterruptedIOException();
274 while (UTIL
.getMiniHBaseCluster().countServedRegions() < totalRegions
) {
275 // while (!cluster.getMaster().allRegionsAssigned()) {
276 LOG
.debug("Waiting for there to be "+ totalRegions
+" regions, but there are "
277 + UTIL
.getMiniHBaseCluster().countServedRegions() + " right now.");
280 } catch (InterruptedException e
) {
281 throw new InterruptedIOException();
284 UTIL
.waitUntilNoRegionsInTransition();