2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org
.apache
.hadoop
.hbase
;
20 import static org
.junit
.Assert
.assertEquals
;
21 import static org
.junit
.Assert
.assertTrue
;
22 import static org
.junit
.Assert
.fail
;
24 import java
.io
.IOException
;
25 import java
.io
.InterruptedIOException
;
26 import java
.util
.ArrayList
;
27 import java
.util
.Arrays
;
28 import java
.util
.Collection
;
29 import java
.util
.List
;
30 import org
.apache
.hadoop
.hbase
.client
.Admin
;
31 import org
.apache
.hadoop
.hbase
.client
.BalanceResponse
;
32 import org
.apache
.hadoop
.hbase
.client
.ColumnFamilyDescriptorBuilder
;
33 import org
.apache
.hadoop
.hbase
.client
.Connection
;
34 import org
.apache
.hadoop
.hbase
.client
.ConnectionFactory
;
35 import org
.apache
.hadoop
.hbase
.client
.RegionInfo
;
36 import org
.apache
.hadoop
.hbase
.client
.RegionLocator
;
37 import org
.apache
.hadoop
.hbase
.client
.TableDescriptor
;
38 import org
.apache
.hadoop
.hbase
.client
.TableDescriptorBuilder
;
39 import org
.apache
.hadoop
.hbase
.regionserver
.HRegionServer
;
40 import org
.apache
.hadoop
.hbase
.testclassification
.FlakeyTests
;
41 import org
.apache
.hadoop
.hbase
.testclassification
.LargeTests
;
42 import org
.apache
.hadoop
.hbase
.util
.Bytes
;
43 import org
.apache
.hadoop
.hbase
.util
.JVMClusterUtil
;
44 import org
.apache
.hadoop
.hbase
.util
.Threads
;
45 import org
.junit
.After
;
46 import org
.junit
.Before
;
47 import org
.junit
.ClassRule
;
48 import org
.junit
.Test
;
49 import org
.junit
.experimental
.categories
.Category
;
50 import org
.junit
.runner
.RunWith
;
51 import org
.junit
.runners
.Parameterized
;
52 import org
.junit
.runners
.Parameterized
.Parameters
;
53 import org
.slf4j
.Logger
;
54 import org
.slf4j
.LoggerFactory
;
56 import org
.apache
.hadoop
.hbase
.shaded
.protobuf
.ProtobufUtil
;
59 * Test whether region re-balancing works. (HBASE-71)
60 * The test only works for cluster wide balancing, not per table wide.
61 * Increase the margin a little to make StochasticLoadBalancer result acceptable.
63 @Category({FlakeyTests
.class, LargeTests
.class})
64 @RunWith(value
= Parameterized
.class)
65 public class TestRegionRebalancing
{
68 public static final HBaseClassTestRule CLASS_RULE
=
69 HBaseClassTestRule
.forClass(TestRegionRebalancing
.class);
72 public static Collection
<Object
[]> data() {
73 Object
[][] balancers
=
74 new String
[][] { { "org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer" },
75 { "org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer" } };
76 return Arrays
.asList(balancers
);
79 private static final byte[] FAMILY_NAME
= Bytes
.toBytes("col");
80 private static final Logger LOG
= LoggerFactory
.getLogger(TestRegionRebalancing
.class);
81 private final HBaseTestingUtil UTIL
= new HBaseTestingUtil();
82 private RegionLocator regionLocator
;
83 private TableDescriptor tableDescriptor
;
84 private String balancerName
;
86 public TestRegionRebalancing(String balancerName
) {
87 this.balancerName
= balancerName
;
92 public void after() throws Exception
{
93 UTIL
.shutdownMiniCluster();
97 public void before() throws Exception
{
98 UTIL
.getConfiguration().set("hbase.master.loadbalancer.class", this.balancerName
);
99 // set minCostNeedBalance to 0, make sure balancer run
100 UTIL
.startMiniCluster(1);
102 this.tableDescriptor
= TableDescriptorBuilder
.newBuilder(TableName
.valueOf("test"))
103 .setColumnFamily(ColumnFamilyDescriptorBuilder
.of(FAMILY_NAME
)).build();
107 * For HBASE-71. Try a few different configurations of starting and stopping
108 * region servers to see if the assignment or regions is pretty balanced.
109 * @throws IOException
110 * @throws InterruptedException
113 public void testRebalanceOnRegionServerNumberChange()
114 throws IOException
, InterruptedException
{
115 try(Connection connection
= ConnectionFactory
.createConnection(UTIL
.getConfiguration());
116 Admin admin
= connection
.getAdmin()) {
117 admin
.createTable(this.tableDescriptor
, Arrays
.copyOfRange(HBaseTestingUtil
.KEYS
,
118 1, HBaseTestingUtil
.KEYS
.length
));
119 this.regionLocator
= connection
.getRegionLocator(this.tableDescriptor
.getTableName());
121 MetaTableAccessor
.fullScanMetaAndPrint(admin
.getConnection());
123 assertEquals("Test table should have right number of regions",
124 HBaseTestingUtil
.KEYS
.length
,
125 this.regionLocator
.getStartKeys().length
);
127 // verify that the region assignments are balanced to start out
128 assertRegionsAreBalanced();
130 // add a region server - total of 2
131 LOG
.info("Started second server=" +
132 UTIL
.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
133 UTIL
.getHBaseCluster().getMaster().balance();
134 assertRegionsAreBalanced();
136 // On a balanced cluster, calling balance() should return true
137 BalanceResponse response
= UTIL
.getHBaseCluster().getMaster().balance();
138 assertTrue(response
.isBalancerRan());
139 assertEquals(0, response
.getMovesCalculated());
140 assertEquals(0, response
.getMovesExecuted());
142 // if we add a server, then the balance() call should return true
143 // add a region server - total of 3
144 LOG
.info("Started third server=" +
145 UTIL
.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
146 waitForAllRegionsAssigned();
148 response
= UTIL
.getHBaseCluster().getMaster().balance();
149 assertTrue(response
.isBalancerRan());
150 assertTrue(response
.getMovesCalculated() > 0);
151 assertEquals(response
.getMovesCalculated(), response
.getMovesExecuted());
152 assertRegionsAreBalanced();
154 // kill a region server - total of 2
155 LOG
.info("Stopped third server=" + UTIL
.getHBaseCluster().stopRegionServer(2, false));
156 UTIL
.getHBaseCluster().waitOnRegionServer(2);
157 waitOnCrashProcessing();
158 UTIL
.getHBaseCluster().getMaster().balance();
159 assertRegionsAreBalanced();
161 // start two more region servers - total of 4
162 LOG
.info("Readding third server=" +
163 UTIL
.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
164 LOG
.info("Added fourth server=" +
165 UTIL
.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
166 waitOnCrashProcessing();
167 waitForAllRegionsAssigned();
169 response
= UTIL
.getHBaseCluster().getMaster().balance();
170 assertTrue(response
.isBalancerRan());
171 assertTrue(response
.getMovesCalculated() > 0);
172 assertEquals(response
.getMovesCalculated(), response
.getMovesExecuted());
174 assertRegionsAreBalanced();
175 for (int i
= 0; i
< 6; i
++){
176 LOG
.info("Adding " + (i
+ 5) + "th region server");
177 UTIL
.getHBaseCluster().startRegionServer();
179 waitForAllRegionsAssigned();
181 response
= UTIL
.getHBaseCluster().getMaster().balance();
182 assertTrue(response
.isBalancerRan());
183 assertTrue(response
.getMovesCalculated() > 0);
184 assertEquals(response
.getMovesCalculated(), response
.getMovesExecuted());
186 assertRegionsAreBalanced();
187 regionLocator
.close();
192 * Wait on crash processing. Balancer won't run if processing a crashed server.
194 private void waitOnCrashProcessing() throws IOException
{
195 while (UTIL
.getHBaseCluster().getMaster().getServerManager().areDeadServersInProgress()) {
196 LOG
.info("Waiting on processing of crashed server before proceeding...");
202 * Determine if regions are balanced. Figure out the total, divide by the
203 * number of online servers, then test if each server is +/- 1 of average
206 private void assertRegionsAreBalanced() throws IOException
{
207 // TODO: Fix this test. Old balancer used to run with 'slop'. New
208 // balancer does not.
209 boolean success
= false;
210 float slop
= (float)UTIL
.getConfiguration().getFloat("hbase.regions.slop", 0.1f
);
211 if (slop
<= 0) slop
= 1;
213 for (int i
= 0; i
< 5; i
++) {
215 // make sure all the regions are reassigned before we test balance
216 waitForAllRegionsAssigned();
218 long regionCount
= UTIL
.getMiniHBaseCluster().countServedRegions();
219 List
<HRegionServer
> servers
= getOnlineRegionServers();
220 double avg
= (double)regionCount
/ (double)servers
.size();
221 int avgLoadPlusSlop
= (int)Math
.ceil(avg
* (1 + slop
));
222 int avgLoadMinusSlop
= (int)Math
.floor(avg
* (1 - slop
)) - 1;
223 // Increase the margin a little to accommodate StochasticLoadBalancer
224 if (this.balancerName
.contains("StochasticLoadBalancer")) {
228 LOG
.debug("There are " + servers
.size() + " servers and " + regionCount
229 + " regions. Load Average: " + avg
+ " low border: " + avgLoadMinusSlop
230 + ", up border: " + avgLoadPlusSlop
+ "; attempt: " + i
);
232 for (HRegionServer server
: servers
) {
234 ProtobufUtil
.getOnlineRegions(server
.getRSRpcServices()).size();
235 LOG
.debug(server
.getServerName() + " Avg: " + avg
+ " actual: " + serverLoad
);
236 if (!(avg
> 2.0 && serverLoad
<= avgLoadPlusSlop
237 && serverLoad
>= avgLoadMinusSlop
)) {
238 for (RegionInfo hri
:
239 ProtobufUtil
.getOnlineRegions(server
.getRSRpcServices())) {
240 if (hri
.isMetaRegion()) serverLoad
--;
241 // LOG.debug(hri.getRegionNameAsString());
243 if (!(serverLoad
<= avgLoadPlusSlop
&& serverLoad
>= avgLoadMinusSlop
)) {
244 LOG
.debug(server
.getServerName() + " Isn't balanced!!! Avg: " + avg
+
245 " actual: " + serverLoad
+ " slop: " + slop
);
253 // one or more servers are not balanced. sleep a little to give it a
254 // chance to catch up. then, go back to the retry loop.
257 } catch (InterruptedException e
) {}
259 UTIL
.getHBaseCluster().getMaster().balance();
263 // if we get here, all servers were balanced, so we should just return.
266 // if we get here, we tried 5 times and never got to short circuit out of
267 // the retry loop, so this is a failure.
268 fail("After 5 attempts, region assignments were not balanced.");
271 private List
<HRegionServer
> getOnlineRegionServers() {
272 List
<HRegionServer
> list
= new ArrayList
<>();
273 for (JVMClusterUtil
.RegionServerThread rst
:
274 UTIL
.getHBaseCluster().getRegionServerThreads()) {
275 if (rst
.getRegionServer().isOnline()) {
276 list
.add(rst
.getRegionServer());
283 * Wait until all the regions are assigned.
285 private void waitForAllRegionsAssigned() throws IOException
{
286 int totalRegions
= HBaseTestingUtil
.KEYS
.length
;
289 } catch (InterruptedException e
) {
290 throw new InterruptedIOException();
292 while (UTIL
.getMiniHBaseCluster().countServedRegions() < totalRegions
) {
293 // while (!cluster.getMaster().allRegionsAssigned()) {
294 LOG
.debug("Waiting for there to be "+ totalRegions
+" regions, but there are "
295 + UTIL
.getMiniHBaseCluster().countServedRegions() + " right now.");
298 } catch (InterruptedException e
) {
299 throw new InterruptedIOException();
302 UTIL
.waitUntilNoRegionsInTransition();