HBASE-26416 Implement a new method for region replication instead of using replay...
[hbase.git] / hbase-server / src / test / java / org / apache / hadoop / hbase / TestZooKeeper.java
blob1acf0a9d71c4cd93bebde006fdba4ea2fb9a3939
1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org.apache.hadoop.hbase;
20 import static org.junit.Assert.assertEquals;
21 import static org.junit.Assert.assertFalse;
22 import static org.junit.Assert.assertTrue;
24 import edu.umd.cs.findbugs.annotations.NonNull;
25 import java.util.List;
26 import java.util.Map;
28 import org.apache.hadoop.conf.Configuration;
29 import org.apache.hadoop.hbase.client.Admin;
30 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
31 import org.apache.hadoop.hbase.client.Put;
32 import org.apache.hadoop.hbase.client.RegionInfo;
33 import org.apache.hadoop.hbase.client.ResultScanner;
34 import org.apache.hadoop.hbase.client.Scan;
35 import org.apache.hadoop.hbase.client.Table;
36 import org.apache.hadoop.hbase.client.TableDescriptor;
37 import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
38 import org.apache.hadoop.hbase.coordination.ZkSplitLogWorkerCoordination;
39 import org.apache.hadoop.hbase.master.HMaster;
40 import org.apache.hadoop.hbase.master.LoadBalancer;
41 import org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer;
42 import org.apache.hadoop.hbase.testclassification.MediumTests;
43 import org.apache.hadoop.hbase.testclassification.MiscTests;
44 import org.apache.hadoop.hbase.util.Bytes;
45 import org.apache.hadoop.hbase.util.CommonFSUtils;
46 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
47 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
48 import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
49 import org.apache.zookeeper.KeeperException;
50 import org.junit.After;
51 import org.junit.AfterClass;
52 import org.junit.Before;
53 import org.junit.BeforeClass;
54 import org.junit.ClassRule;
55 import org.junit.Rule;
56 import org.junit.Test;
57 import org.junit.experimental.categories.Category;
58 import org.junit.rules.TestName;
59 import org.slf4j.Logger;
60 import org.slf4j.LoggerFactory;
62 @Category({MiscTests.class, MediumTests.class})
63 public class TestZooKeeper {
65 @ClassRule
66 public static final HBaseClassTestRule CLASS_RULE =
67 HBaseClassTestRule.forClass(TestZooKeeper.class);
69 private static final Logger LOG = LoggerFactory.getLogger(TestZooKeeper.class);
71 private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
73 @Rule
74 public TestName name = new TestName();
76 @BeforeClass
77 public static void setUpBeforeClass() throws Exception {
78 // Test we can first start the ZK cluster by itself
79 Configuration conf = TEST_UTIL.getConfiguration();
80 TEST_UTIL.startMiniDFSCluster(2);
81 TEST_UTIL.startMiniZKCluster();
82 conf.set(HConstants.CLIENT_CONNECTION_REGISTRY_IMPL_CONF_KEY,
83 HConstants.ZK_CONNECTION_REGISTRY_CLASS);
84 conf.setInt(HConstants.ZK_SESSION_TIMEOUT, 1000);
85 conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, MockLoadBalancer.class,
86 LoadBalancer.class);
87 TEST_UTIL.startMiniDFSCluster(2);
90 @AfterClass
91 public static void tearDownAfterClass() throws Exception {
92 TEST_UTIL.shutdownMiniCluster();
95 @Before
96 public void setUp() throws Exception {
97 StartTestingClusterOption option = StartTestingClusterOption.builder()
98 .numMasters(2).numRegionServers(2).build();
99 TEST_UTIL.startMiniHBaseCluster(option);
102 @After
103 public void after() throws Exception {
104 try {
105 TEST_UTIL.getHBaseCluster().waitForActiveAndReadyMaster(10000);
106 // Some regionserver could fail to delete its znode.
107 // So shutdown could hang. Let's kill them all instead.
108 TEST_UTIL.getHBaseCluster().killAll();
110 // Still need to clean things up
111 TEST_UTIL.shutdownMiniHBaseCluster();
112 } finally {
113 TEST_UTIL.getTestFileSystem().delete(CommonFSUtils.getRootDir(TEST_UTIL.getConfiguration()),
114 true);
115 ZKUtil.deleteNodeRecursively(TEST_UTIL.getZooKeeperWatcher(), "/hbase");
119 @Test
120 public void testRegionServerSessionExpired() throws Exception {
121 LOG.info("Starting " + name.getMethodName());
122 TEST_UTIL.expireRegionServerSession(0);
123 testSanity(name.getMethodName());
126 @Test
127 public void testMasterSessionExpired() throws Exception {
128 LOG.info("Starting " + name.getMethodName());
129 TEST_UTIL.expireMasterSession();
130 testSanity(name.getMethodName());
134 * Master recovery when the znode already exists. Internally, this
135 * test differs from {@link #testMasterSessionExpired} because here
136 * the master znode will exist in ZK.
138 @Test
139 public void testMasterZKSessionRecoveryFailure() throws Exception {
140 LOG.info("Starting " + name.getMethodName());
141 SingleProcessHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
142 HMaster m = cluster.getMaster();
143 m.abort("Test recovery from zk session expired",
144 new KeeperException.SessionExpiredException());
145 assertTrue(m.isStopped()); // Master doesn't recover any more
146 testSanity(name.getMethodName());
150 * Make sure we can use the cluster
152 private void testSanity(final String testName) throws Exception {
153 String tableName = testName + "_" + EnvironmentEdgeManager.currentTime();
154 TableDescriptor desc = TableDescriptorBuilder.newBuilder(TableName.valueOf(tableName))
155 .setColumnFamily(ColumnFamilyDescriptorBuilder.of("fam")).build();
156 LOG.info("Creating table " + tableName);
157 Admin admin = TEST_UTIL.getAdmin();
158 try {
159 admin.createTable(desc);
160 } finally {
161 admin.close();
164 Table table = TEST_UTIL.getConnection().getTable(desc.getTableName());
165 Put put = new Put(Bytes.toBytes("testrow"));
166 put.addColumn(Bytes.toBytes("fam"), Bytes.toBytes("col"), Bytes.toBytes("testdata"));
167 LOG.info("Putting table " + tableName);
168 table.put(put);
169 table.close();
173 * Tests that the master does not call retainAssignment after recovery from expired zookeeper
174 * session. Without the HBASE-6046 fix master always tries to assign all the user regions by
175 * calling retainAssignment.
177 @Test
178 public void testRegionAssignmentAfterMasterRecoveryDueToZKExpiry() throws Exception {
179 SingleProcessHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
180 cluster.startRegionServer();
181 cluster.waitForActiveAndReadyMaster(10000);
182 HMaster m = cluster.getMaster();
183 final ZKWatcher zkw = m.getZooKeeper();
184 // now the cluster is up. So assign some regions.
185 try (Admin admin = TEST_UTIL.getAdmin()) {
186 byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("a"), Bytes.toBytes("b"),
187 Bytes.toBytes("c"), Bytes.toBytes("d"), Bytes.toBytes("e"), Bytes.toBytes("f"),
188 Bytes.toBytes("g"), Bytes.toBytes("h"), Bytes.toBytes("i"), Bytes.toBytes("j") };
189 TableDescriptor htd =
190 TableDescriptorBuilder.newBuilder(TableName.valueOf(name.getMethodName()))
191 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build();
192 admin.createTable(htd, SPLIT_KEYS);
193 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
194 m.getZooKeeper().close();
195 MockLoadBalancer.retainAssignCalled = false;
196 final int expectedNumOfListeners = countPermanentListeners(zkw);
197 m.abort("Test recovery from zk session expired",
198 new KeeperException.SessionExpiredException());
199 assertTrue(m.isStopped()); // Master doesn't recover any more
200 // The recovered master should not call retainAssignment, as it is not a
201 // clean startup.
202 assertFalse("Retain assignment should not be called", MockLoadBalancer.retainAssignCalled);
203 // number of listeners should be same as the value before master aborted
204 // wait for new master is initialized
205 cluster.waitForActiveAndReadyMaster(120000);
206 final HMaster newMaster = cluster.getMasterThread().getMaster();
207 assertEquals(expectedNumOfListeners, countPermanentListeners(newMaster.getZooKeeper()));
212 * Count listeners in zkw excluding listeners, that belongs to workers or other
213 * temporary processes.
215 private int countPermanentListeners(ZKWatcher watcher) {
216 return countListeners(watcher, ZkSplitLogWorkerCoordination.class);
220 * Count listeners in zkw excluding provided classes
222 private int countListeners(ZKWatcher watcher, Class<?>... exclude) {
223 int cnt = 0;
224 for (Object o : watcher.getListeners()) {
225 boolean skip = false;
226 for (Class<?> aClass : exclude) {
227 if (aClass.isAssignableFrom(o.getClass())) {
228 skip = true;
229 break;
232 if (!skip) {
233 cnt += 1;
236 return cnt;
240 * Tests whether the logs are split when master recovers from a expired zookeeper session and an
241 * RS goes down.
243 @Test
244 public void testLogSplittingAfterMasterRecoveryDueToZKExpiry() throws Exception {
245 SingleProcessHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
246 cluster.startRegionServer();
247 TableName tableName = TableName.valueOf(name.getMethodName());
248 byte[] family = Bytes.toBytes("col");
249 try (Admin admin = TEST_UTIL.getAdmin()) {
250 byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("1"), Bytes.toBytes("2"),
251 Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5") };
252 TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName)
253 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(family)).build();
254 admin.createTable(htd, SPLIT_KEYS);
256 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
257 HMaster m = cluster.getMaster();
258 try (Table table = TEST_UTIL.getConnection().getTable(tableName)) {
259 int numberOfPuts;
260 for (numberOfPuts = 0; numberOfPuts < 6; numberOfPuts++) {
261 Put p = new Put(Bytes.toBytes(numberOfPuts));
262 p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"),
263 Bytes.toBytes("value" + numberOfPuts));
264 table.put(p);
266 m.abort("Test recovery from zk session expired",
267 new KeeperException.SessionExpiredException());
268 assertTrue(m.isStopped()); // Master doesn't recover any more
269 cluster.killRegionServer(TEST_UTIL.getRSForFirstRegionInTable(tableName).getServerName());
270 // Without patch for HBASE-6046 this test case will always timeout
271 // with patch the test case should pass.
272 int numberOfRows = 0;
273 try (ResultScanner scanner = table.getScanner(new Scan())) {
274 while (scanner.next() != null) {
275 numberOfRows++;
278 assertEquals("Number of rows should be equal to number of puts.", numberOfPuts, numberOfRows);
282 static class MockLoadBalancer extends SimpleLoadBalancer {
283 static boolean retainAssignCalled = false;
285 @Override
286 @NonNull
287 public Map<ServerName, List<RegionInfo>> retainAssignment(
288 Map<RegionInfo, ServerName> regions, List<ServerName> servers) throws HBaseIOException {
289 retainAssignCalled = true;
290 return super.retainAssignment(regions, servers);