HBASE-24033 Add ut for loading the corrupt recovered hfiles (#1322)
[hbase.git] / hbase-server / src / test / java / org / apache / hadoop / hbase / TestZooKeeper.java
blobcfbb9d2a6ccef15e84c59ad4a3ef36ac6a672d30
1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org.apache.hadoop.hbase;
20 import static org.junit.Assert.assertEquals;
21 import static org.junit.Assert.assertFalse;
22 import static org.junit.Assert.assertTrue;
24 import java.util.List;
25 import java.util.Map;
26 import org.apache.hadoop.conf.Configuration;
27 import org.apache.hadoop.hbase.client.Admin;
28 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
29 import org.apache.hadoop.hbase.client.Put;
30 import org.apache.hadoop.hbase.client.RegionInfo;
31 import org.apache.hadoop.hbase.client.ResultScanner;
32 import org.apache.hadoop.hbase.client.Scan;
33 import org.apache.hadoop.hbase.client.Table;
34 import org.apache.hadoop.hbase.client.TableDescriptor;
35 import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
36 import org.apache.hadoop.hbase.coordination.ZkSplitLogWorkerCoordination;
37 import org.apache.hadoop.hbase.master.HMaster;
38 import org.apache.hadoop.hbase.master.LoadBalancer;
39 import org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer;
40 import org.apache.hadoop.hbase.testclassification.MediumTests;
41 import org.apache.hadoop.hbase.testclassification.MiscTests;
42 import org.apache.hadoop.hbase.util.Bytes;
43 import org.apache.hadoop.hbase.util.FSUtils;
44 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
45 import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
46 import org.apache.zookeeper.KeeperException;
47 import org.junit.After;
48 import org.junit.AfterClass;
49 import org.junit.Before;
50 import org.junit.BeforeClass;
51 import org.junit.ClassRule;
52 import org.junit.Rule;
53 import org.junit.Test;
54 import org.junit.experimental.categories.Category;
55 import org.junit.rules.TestName;
56 import org.slf4j.Logger;
57 import org.slf4j.LoggerFactory;
59 @Category({MiscTests.class, MediumTests.class})
60 public class TestZooKeeper {
62 @ClassRule
63 public static final HBaseClassTestRule CLASS_RULE =
64 HBaseClassTestRule.forClass(TestZooKeeper.class);
66 private static final Logger LOG = LoggerFactory.getLogger(TestZooKeeper.class);
68 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
70 @Rule
71 public TestName name = new TestName();
73 @BeforeClass
74 public static void setUpBeforeClass() throws Exception {
75 // Test we can first start the ZK cluster by itself
76 Configuration conf = TEST_UTIL.getConfiguration();
77 TEST_UTIL.startMiniDFSCluster(2);
78 TEST_UTIL.startMiniZKCluster();
79 conf.setInt(HConstants.ZK_SESSION_TIMEOUT, 1000);
80 conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, MockLoadBalancer.class,
81 LoadBalancer.class);
82 TEST_UTIL.startMiniDFSCluster(2);
85 @AfterClass
86 public static void tearDownAfterClass() throws Exception {
87 TEST_UTIL.shutdownMiniCluster();
90 @Before
91 public void setUp() throws Exception {
92 StartMiniClusterOption option = StartMiniClusterOption.builder()
93 .numMasters(2).numRegionServers(2).build();
94 TEST_UTIL.startMiniHBaseCluster(option);
97 @After
98 public void after() throws Exception {
99 try {
100 TEST_UTIL.getHBaseCluster().waitForActiveAndReadyMaster(10000);
101 // Some regionserver could fail to delete its znode.
102 // So shutdown could hang. Let's kill them all instead.
103 TEST_UTIL.getHBaseCluster().killAll();
105 // Still need to clean things up
106 TEST_UTIL.shutdownMiniHBaseCluster();
107 } finally {
108 TEST_UTIL.getTestFileSystem().delete(FSUtils.getRootDir(TEST_UTIL.getConfiguration()), true);
109 ZKUtil.deleteNodeRecursively(TEST_UTIL.getZooKeeperWatcher(), "/hbase");
113 @Test
114 public void testRegionServerSessionExpired() throws Exception {
115 LOG.info("Starting " + name.getMethodName());
116 TEST_UTIL.expireRegionServerSession(0);
117 testSanity(name.getMethodName());
120 @Test
121 public void testMasterSessionExpired() throws Exception {
122 LOG.info("Starting " + name.getMethodName());
123 TEST_UTIL.expireMasterSession();
124 testSanity(name.getMethodName());
128 * Master recovery when the znode already exists. Internally, this
129 * test differs from {@link #testMasterSessionExpired} because here
130 * the master znode will exist in ZK.
132 @Test
133 public void testMasterZKSessionRecoveryFailure() throws Exception {
134 LOG.info("Starting " + name.getMethodName());
135 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
136 HMaster m = cluster.getMaster();
137 m.abort("Test recovery from zk session expired",
138 new KeeperException.SessionExpiredException());
139 assertTrue(m.isStopped()); // Master doesn't recover any more
140 testSanity(name.getMethodName());
144 * Make sure we can use the cluster
146 private void testSanity(final String testName) throws Exception {
147 String tableName = testName + "_" + System.currentTimeMillis();
148 TableDescriptor desc = TableDescriptorBuilder.newBuilder(TableName.valueOf(tableName))
149 .setColumnFamily(ColumnFamilyDescriptorBuilder.of("fam")).build();
150 LOG.info("Creating table " + tableName);
151 Admin admin = TEST_UTIL.getAdmin();
152 try {
153 admin.createTable(desc);
154 } finally {
155 admin.close();
158 Table table = TEST_UTIL.getConnection().getTable(desc.getTableName());
159 Put put = new Put(Bytes.toBytes("testrow"));
160 put.addColumn(Bytes.toBytes("fam"), Bytes.toBytes("col"), Bytes.toBytes("testdata"));
161 LOG.info("Putting table " + tableName);
162 table.put(put);
163 table.close();
167 * Tests that the master does not call retainAssignment after recovery from expired zookeeper
168 * session. Without the HBASE-6046 fix master always tries to assign all the user regions by
169 * calling retainAssignment.
171 @Test
172 public void testRegionAssignmentAfterMasterRecoveryDueToZKExpiry() throws Exception {
173 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
174 cluster.startRegionServer();
175 cluster.waitForActiveAndReadyMaster(10000);
176 HMaster m = cluster.getMaster();
177 final ZKWatcher zkw = m.getZooKeeper();
178 // now the cluster is up. So assign some regions.
179 try (Admin admin = TEST_UTIL.getAdmin()) {
180 byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("a"), Bytes.toBytes("b"),
181 Bytes.toBytes("c"), Bytes.toBytes("d"), Bytes.toBytes("e"), Bytes.toBytes("f"),
182 Bytes.toBytes("g"), Bytes.toBytes("h"), Bytes.toBytes("i"), Bytes.toBytes("j") };
183 TableDescriptor htd =
184 TableDescriptorBuilder.newBuilder(TableName.valueOf(name.getMethodName()))
185 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(HConstants.CATALOG_FAMILY)).build();
186 admin.createTable(htd, SPLIT_KEYS);
187 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
188 m.getZooKeeper().close();
189 MockLoadBalancer.retainAssignCalled = false;
190 final int expectedNumOfListeners = countPermanentListeners(zkw);
191 m.abort("Test recovery from zk session expired",
192 new KeeperException.SessionExpiredException());
193 assertTrue(m.isStopped()); // Master doesn't recover any more
194 // The recovered master should not call retainAssignment, as it is not a
195 // clean startup.
196 assertFalse("Retain assignment should not be called", MockLoadBalancer.retainAssignCalled);
197 // number of listeners should be same as the value before master aborted
198 // wait for new master is initialized
199 cluster.waitForActiveAndReadyMaster(120000);
200 final HMaster newMaster = cluster.getMasterThread().getMaster();
201 assertEquals(expectedNumOfListeners, countPermanentListeners(newMaster.getZooKeeper()));
206 * Count listeners in zkw excluding listeners, that belongs to workers or other
207 * temporary processes.
209 private int countPermanentListeners(ZKWatcher watcher) {
210 return countListeners(watcher, ZkSplitLogWorkerCoordination.class);
214 * Count listeners in zkw excluding provided classes
216 private int countListeners(ZKWatcher watcher, Class<?>... exclude) {
217 int cnt = 0;
218 for (Object o : watcher.getListeners()) {
219 boolean skip = false;
220 for (Class<?> aClass : exclude) {
221 if (aClass.isAssignableFrom(o.getClass())) {
222 skip = true;
223 break;
226 if (!skip) {
227 cnt += 1;
230 return cnt;
234 * Tests whether the logs are split when master recovers from a expired zookeeper session and an
235 * RS goes down.
237 @Test
238 public void testLogSplittingAfterMasterRecoveryDueToZKExpiry() throws Exception {
239 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
240 cluster.startRegionServer();
241 TableName tableName = TableName.valueOf(name.getMethodName());
242 byte[] family = Bytes.toBytes("col");
243 try (Admin admin = TEST_UTIL.getAdmin()) {
244 byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("1"), Bytes.toBytes("2"),
245 Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5") };
246 TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName)
247 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(family)).build();
248 admin.createTable(htd, SPLIT_KEYS);
250 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
251 HMaster m = cluster.getMaster();
252 try (Table table = TEST_UTIL.getConnection().getTable(tableName)) {
253 int numberOfPuts;
254 for (numberOfPuts = 0; numberOfPuts < 6; numberOfPuts++) {
255 Put p = new Put(Bytes.toBytes(numberOfPuts));
256 p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"),
257 Bytes.toBytes("value" + numberOfPuts));
258 table.put(p);
260 m.abort("Test recovery from zk session expired",
261 new KeeperException.SessionExpiredException());
262 assertTrue(m.isStopped()); // Master doesn't recover any more
263 cluster.killRegionServer(TEST_UTIL.getRSForFirstRegionInTable(tableName).getServerName());
264 // Without patch for HBASE-6046 this test case will always timeout
265 // with patch the test case should pass.
266 int numberOfRows = 0;
267 try (ResultScanner scanner = table.getScanner(new Scan())) {
268 while (scanner.next() != null) {
269 numberOfRows++;
272 assertEquals("Number of rows should be equal to number of puts.", numberOfPuts, numberOfRows);
276 static class MockLoadBalancer extends SimpleLoadBalancer {
277 static boolean retainAssignCalled = false;
279 @Override
280 public Map<ServerName, List<RegionInfo>> retainAssignment(
281 Map<RegionInfo, ServerName> regions, List<ServerName> servers) throws HBaseIOException {
282 retainAssignCalled = true;
283 return super.retainAssignment(regions, servers);