HBASE-23949 refactor loadBalancer implements for rsgroup balance by table to achieve...
[hbase.git] / hbase-server / src / test / java / org / apache / hadoop / hbase / rsgroup / TestRSGroupsKillRS.java
blobb4b62cef435ddc37cd9f187c156e018579a77a66
1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org.apache.hadoop.hbase.rsgroup;
20 import static org.apache.hadoop.hbase.util.Threads.sleep;
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertTrue;
24 import java.lang.reflect.Field;
25 import java.lang.reflect.Modifier;
26 import java.util.ArrayList;
27 import java.util.HashSet;
28 import java.util.List;
29 import java.util.Map;
30 import java.util.Set;
31 import org.apache.hadoop.hbase.HBaseClassTestRule;
32 import org.apache.hadoop.hbase.NamespaceDescriptor;
33 import org.apache.hadoop.hbase.ServerName;
34 import org.apache.hadoop.hbase.TableName;
35 import org.apache.hadoop.hbase.Version;
36 import org.apache.hadoop.hbase.Waiter;
37 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
38 import org.apache.hadoop.hbase.client.RegionInfo;
39 import org.apache.hadoop.hbase.client.Table;
40 import org.apache.hadoop.hbase.client.TableDescriptor;
41 import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
42 import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
43 import org.apache.hadoop.hbase.net.Address;
44 import org.apache.hadoop.hbase.testclassification.MediumTests;
45 import org.apache.hadoop.hbase.testclassification.RSGroupTests;
46 import org.apache.hadoop.hbase.util.Bytes;
47 import org.apache.hadoop.hbase.util.JVMClusterUtil;
48 import org.apache.hadoop.hbase.util.VersionInfo;
49 import org.junit.After;
50 import org.junit.AfterClass;
51 import org.junit.Before;
52 import org.junit.BeforeClass;
53 import org.junit.ClassRule;
54 import org.junit.Test;
55 import org.junit.experimental.categories.Category;
56 import org.slf4j.Logger;
57 import org.slf4j.LoggerFactory;
59 import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
61 @Category({ RSGroupTests.class, MediumTests.class })
62 public class TestRSGroupsKillRS extends TestRSGroupsBase {
64 @ClassRule
65 public static final HBaseClassTestRule CLASS_RULE =
66 HBaseClassTestRule.forClass(TestRSGroupsKillRS.class);
68 private static final Logger LOG = LoggerFactory.getLogger(TestRSGroupsKillRS.class);
70 @BeforeClass
71 public static void setUp() throws Exception {
72 setUpTestBeforeClass();
75 @AfterClass
76 public static void tearDown() throws Exception {
77 tearDownAfterClass();
80 @Before
81 public void beforeMethod() throws Exception {
82 setUpBeforeMethod();
85 @After
86 public void afterMethod() throws Exception {
87 tearDownAfterMethod();
90 @Test
91 public void testKillRS() throws Exception {
92 RSGroupInfo appInfo = addGroup("appInfo", 1);
93 final TableName tableName =
94 TableName.valueOf(TABLE_PREFIX + "_ns", getNameWithoutIndex(name.getMethodName()));
95 ADMIN.createNamespace(NamespaceDescriptor.create(tableName.getNamespaceAsString())
96 .addConfiguration(RSGroupInfo.NAMESPACE_DESC_PROP_GROUP, appInfo.getName()).build());
97 final TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName)
98 .setColumnFamily(ColumnFamilyDescriptorBuilder.of("f")).build();
99 ADMIN.createTable(desc);
100 // wait for created table to be assigned
101 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
102 @Override
103 public boolean evaluate() throws Exception {
104 return getTableRegionMap().get(desc.getTableName()) != null;
108 ServerName targetServer = getServerName(appInfo.getServers().iterator().next());
109 assertEquals(1, ADMIN.getRegions(targetServer).size());
111 try {
112 // stopping may cause an exception
113 // due to the connection loss
114 ADMIN.stopRegionServer(targetServer.getAddress().toString());
115 } catch (Exception e) {
117 // wait until the server is actually down
118 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
119 @Override
120 public boolean evaluate() throws Exception {
121 return !CLUSTER.getClusterMetrics().getLiveServerMetrics().containsKey(targetServer);
124 // there is only one rs in the group and we killed it, so the region can not be online, until
125 // later we add new servers to it.
126 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
127 @Override
128 public boolean evaluate() throws Exception {
129 return !CLUSTER.getClusterMetrics().getRegionStatesInTransition().isEmpty();
132 Set<Address> newServers = Sets.newHashSet();
133 newServers.add(ADMIN.getRSGroup(RSGroupInfo.DEFAULT_GROUP).getServers().iterator().next());
134 ADMIN.moveServersToRSGroup(newServers, appInfo.getName());
136 // Make sure all the table's regions get reassigned
137 // disabling the table guarantees no conflicting assign/unassign (ie SSH) happens
138 ADMIN.disableTable(tableName);
139 ADMIN.enableTable(tableName);
141 // wait for region to be assigned
142 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
143 @Override
144 public boolean evaluate() throws Exception {
145 return CLUSTER.getClusterMetrics().getRegionStatesInTransition().isEmpty();
149 ServerName targetServer1 = getServerName(newServers.iterator().next());
150 assertEquals(1, ADMIN.getRegions(targetServer1).size());
151 assertEquals(tableName, ADMIN.getRegions(targetServer1).get(0).getTable());
154 @Test
155 public void testKillAllRSInGroup() throws Exception {
156 // create a rsgroup and move two regionservers to it
157 String groupName = "my_group";
158 int groupRSCount = 2;
159 addGroup(groupName, groupRSCount);
161 // create a table, and move it to my_group
162 Table t = TEST_UTIL.createMultiRegionTable(tableName, Bytes.toBytes("f"), 5);
163 TEST_UTIL.loadTable(t, Bytes.toBytes("f"));
164 Set<TableName> toAddTables = new HashSet<>();
165 toAddTables.add(tableName);
166 ADMIN.setRSGroup(toAddTables, groupName);
167 assertTrue(
168 ADMIN.getConfiguredNamespacesAndTablesInRSGroup(groupName).getSecond().contains(tableName));
169 TEST_UTIL.waitTableAvailable(tableName, 30000);
171 // check my_group servers and table regions
172 Set<Address> servers = ADMIN.getRSGroup(groupName).getServers();
173 assertEquals(2, servers.size());
174 LOG.debug("group servers {}", servers);
175 for (RegionInfo tr : MASTER.getAssignmentManager().getRegionStates()
176 .getRegionsOfTable(tableName)) {
177 assertTrue(servers.contains(MASTER.getAssignmentManager().getRegionStates()
178 .getRegionAssignments().get(tr).getAddress()));
181 // Move a region, to ensure there exists a region whose 'lastHost' is in my_group
182 // ('lastHost' of other regions are in 'default' group)
183 // and check if all table regions are online
184 List<ServerName> gsn = new ArrayList<>();
185 for (Address addr : servers) {
186 gsn.add(getServerName(addr));
188 assertEquals(2, gsn.size());
189 for (Map.Entry<RegionInfo, ServerName> entry : MASTER.getAssignmentManager().getRegionStates()
190 .getRegionAssignments().entrySet()) {
191 if (entry.getKey().getTable().equals(tableName)) {
192 LOG.debug("move region {} from {} to {}", entry.getKey().getRegionNameAsString(),
193 entry.getValue(), gsn.get(1 - gsn.indexOf(entry.getValue())));
194 TEST_UTIL.moveRegionAndWait(entry.getKey(), gsn.get(1 - gsn.indexOf(entry.getValue())));
195 break;
198 TEST_UTIL.waitTableAvailable(tableName, 30000);
200 // case 1: stop all the regionservers in my_group, and restart a regionserver in my_group,
201 // and then check if all table regions are online
202 for (Address addr : ADMIN.getRSGroup(groupName).getServers()) {
203 TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr));
205 // better wait for a while for region reassign
206 sleep(10000);
207 assertEquals(NUM_SLAVES_BASE - gsn.size(),
208 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
209 TEST_UTIL.getMiniHBaseCluster().startRegionServer(gsn.get(0).getHostname(),
210 gsn.get(0).getPort());
211 assertEquals(NUM_SLAVES_BASE - gsn.size() + 1,
212 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
213 TEST_UTIL.waitTableAvailable(tableName, 30000);
215 // case 2: stop all the regionservers in my_group, and move another
216 // regionserver(from the 'default' group) to my_group,
217 // and then check if all table regions are online
218 for (JVMClusterUtil.RegionServerThread rst : TEST_UTIL.getMiniHBaseCluster()
219 .getLiveRegionServerThreads()) {
220 if (rst.getRegionServer().getServerName().getAddress().equals(gsn.get(0).getAddress())) {
221 TEST_UTIL.getMiniHBaseCluster().stopRegionServer(rst.getRegionServer().getServerName());
222 break;
225 sleep(10000);
226 assertEquals(NUM_SLAVES_BASE - gsn.size(),
227 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
228 ServerName newServer = MASTER.getServerManager().getOnlineServersList().get(0);
229 ADMIN.moveServersToRSGroup(Sets.newHashSet(newServer.getAddress()), groupName);
230 // wait and check if table regions are online
231 TEST_UTIL.waitTableAvailable(tableName, 30000);
234 @Test
235 public void testLowerMetaGroupVersion() throws Exception {
236 // create a rsgroup and move one regionserver to it
237 String groupName = "meta_group";
238 int groupRSCount = 1;
239 addGroup(groupName, groupRSCount);
241 // move hbase:meta to meta_group
242 Set<TableName> toAddTables = new HashSet<>();
243 toAddTables.add(TableName.META_TABLE_NAME);
244 ADMIN.setRSGroup(toAddTables, groupName);
245 assertTrue(ADMIN.getConfiguredNamespacesAndTablesInRSGroup(groupName).getSecond()
246 .contains(TableName.META_TABLE_NAME));
248 // restart the regionserver in meta_group, and lower its version
249 String originVersion = "";
250 Set<Address> servers = new HashSet<>();
251 for (Address addr : ADMIN.getRSGroup(groupName).getServers()) {
252 servers.add(addr);
253 TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr));
254 originVersion = MASTER.getRegionServerVersion(getServerName(addr));
256 // better wait for a while for region reassign
257 sleep(10000);
258 assertEquals(NUM_SLAVES_BASE - groupRSCount,
259 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
260 Address address = servers.iterator().next();
261 int majorVersion = VersionInfo.getMajorVersion(originVersion);
262 assertTrue(majorVersion >= 1);
263 String lowerVersion = String.valueOf(majorVersion - 1) + originVersion.split("\\.")[1];
264 setFinalStatic(Version.class.getField("version"), lowerVersion);
265 TEST_UTIL.getMiniHBaseCluster().startRegionServer(address.getHostname(), address.getPort());
266 assertEquals(NUM_SLAVES_BASE,
267 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
268 assertTrue(VersionInfo.compareVersion(originVersion,
269 MASTER.getRegionServerVersion(getServerName(servers.iterator().next()))) > 0);
270 LOG.debug("wait for META assigned...");
271 // SCP finished, which means all regions assigned too.
272 TEST_UTIL.waitFor(60000, () -> !TEST_UTIL.getHBaseCluster().getMaster().getProcedures().stream()
273 .filter(p -> (p instanceof ServerCrashProcedure)).findAny().isPresent());
276 private static void setFinalStatic(Field field, Object newValue) throws Exception {
277 field.setAccessible(true);
278 Field modifiersField = Field.class.getDeclaredField("modifiers");
279 modifiersField.setAccessible(true);
280 modifiersField.setInt(field, field.getModifiers() & ~Modifier.FINAL);
281 field.set(null, newValue);