HBASE-24033 Add ut for loading the corrupt recovered hfiles (#1322)
[hbase.git] / hbase-server / src / test / java / org / apache / hadoop / hbase / regionserver / TestCompactionInDeadRegionServer.java
blob78042cc976fdc7d3400404e0f4458e27208835bc
1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org.apache.hadoop.hbase.regionserver;
20 import static org.junit.Assert.assertEquals;
21 import static org.junit.Assert.fail;
23 import java.io.IOException;
24 import java.util.Arrays;
25 import java.util.List;
26 import org.apache.hadoop.conf.Configuration;
27 import org.apache.hadoop.hbase.HBaseClassTestRule;
28 import org.apache.hadoop.hbase.HBaseTestingUtility;
29 import org.apache.hadoop.hbase.HConstants;
30 import org.apache.hadoop.hbase.TableName;
31 import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
32 import org.apache.hadoop.hbase.YouAreDeadException;
33 import org.apache.hadoop.hbase.client.Get;
34 import org.apache.hadoop.hbase.client.Put;
35 import org.apache.hadoop.hbase.client.Table;
36 import org.apache.hadoop.hbase.testclassification.LargeTests;
37 import org.apache.hadoop.hbase.testclassification.RegionServerTests;
38 import org.apache.hadoop.hbase.util.Bytes;
39 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
40 import org.apache.hadoop.hbase.wal.AsyncFSWALProvider;
41 import org.apache.hadoop.hbase.wal.FSHLogProvider;
42 import org.apache.hadoop.hbase.wal.WALFactory;
43 import org.apache.hadoop.hbase.wal.WALProvider;
44 import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
45 import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
46 import org.junit.After;
47 import org.junit.Before;
48 import org.junit.ClassRule;
49 import org.junit.Test;
50 import org.junit.experimental.categories.Category;
51 import org.junit.runner.RunWith;
52 import org.junit.runners.Parameterized;
53 import org.junit.runners.Parameterized.Parameter;
54 import org.junit.runners.Parameterized.Parameters;
55 import org.slf4j.Logger;
56 import org.slf4j.LoggerFactory;
58 /**
59 * This testcase is used to ensure that the compaction marker will fail a compaction if the RS is
60 * already dead. It can not eliminate FNFE when scanning but it does reduce the possibility a lot.
62 @RunWith(Parameterized.class)
63 @Category({ RegionServerTests.class, LargeTests.class })
64 public class TestCompactionInDeadRegionServer {
66 @ClassRule
67 public static final HBaseClassTestRule CLASS_RULE =
68 HBaseClassTestRule.forClass(TestCompactionInDeadRegionServer.class);
70 private static final Logger LOG = LoggerFactory.getLogger(TestCompactionInDeadRegionServer.class);
72 private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
74 private static final TableName TABLE_NAME = TableName.valueOf("test");
76 private static final byte[] CF = Bytes.toBytes("cf");
78 private static final byte[] CQ = Bytes.toBytes("cq");
80 public static final class IgnoreYouAreDeadRS extends HRegionServer {
82 public IgnoreYouAreDeadRS(Configuration conf) throws IOException, InterruptedException {
83 super(conf);
86 @Override
87 protected void tryRegionServerReport(long reportStartTime, long reportEndTime)
88 throws IOException {
89 try {
90 super.tryRegionServerReport(reportStartTime, reportEndTime);
91 } catch (YouAreDeadException e) {
92 // ignore, do not abort
97 @Parameter
98 public Class<? extends WALProvider> walProvider;
100 @Parameters(name = "{index}: wal={0}")
101 public static List<Object[]> params() {
102 return Arrays.asList(new Object[] { FSHLogProvider.class },
103 new Object[] { AsyncFSWALProvider.class });
106 @Before
107 public void setUp() throws Exception {
108 UTIL.getConfiguration().setClass(WALFactory.WAL_PROVIDER, walProvider, WALProvider.class);
109 UTIL.getConfiguration().setInt(HConstants.ZK_SESSION_TIMEOUT, 2000);
110 UTIL.getConfiguration().setClass(HConstants.REGION_SERVER_IMPL, IgnoreYouAreDeadRS.class,
111 HRegionServer.class);
112 UTIL.startMiniCluster(2);
113 Table table = UTIL.createTable(TABLE_NAME, CF);
114 for (int i = 0; i < 10; i++) {
115 table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i)));
117 UTIL.getAdmin().flush(TABLE_NAME);
118 for (int i = 10; i < 20; i++) {
119 table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i)));
121 UTIL.getAdmin().flush(TABLE_NAME);
124 @After
125 public void tearDown() throws Exception {
126 UTIL.shutdownMiniCluster();
129 @Test
130 public void test() throws Exception {
131 HRegionServer regionSvr = UTIL.getRSForFirstRegionInTable(TABLE_NAME);
132 HRegion region = regionSvr.getRegions(TABLE_NAME).get(0);
133 String regName = region.getRegionInfo().getEncodedName();
134 List<HRegion> metaRegs = regionSvr.getRegions(TableName.META_TABLE_NAME);
135 if (metaRegs != null && !metaRegs.isEmpty()) {
136 LOG.info("meta is on the same server: " + regionSvr);
137 // when region is on same server as hbase:meta, reassigning meta would abort the server
138 // since WAL is broken.
139 // so the region is moved to a different server
140 HRegionServer otherRs = UTIL.getOtherRegionServer(regionSvr);
141 UTIL.moveRegionAndWait(region.getRegionInfo(), otherRs.getServerName());
142 LOG.info("Moved region: " + regName + " to " + otherRs.getServerName());
144 HRegionServer rsToSuspend = UTIL.getRSForFirstRegionInTable(TABLE_NAME);
145 region = rsToSuspend.getRegions(TABLE_NAME).get(0);
147 ZKWatcher watcher = UTIL.getZooKeeperWatcher();
148 watcher.getRecoverableZooKeeper().delete(
149 ZNodePaths.joinZNode(watcher.getZNodePaths().rsZNode, rsToSuspend.getServerName().toString()),
150 -1);
151 LOG.info("suspending " + rsToSuspend);
152 UTIL.waitFor(60000, 1000, new ExplainingPredicate<Exception>() {
154 @Override
155 public boolean evaluate() throws Exception {
156 for (RegionServerThread thread : UTIL.getHBaseCluster().getRegionServerThreads()) {
157 HRegionServer rs = thread.getRegionServer();
158 if (rs != rsToSuspend) {
159 return !rs.getRegions(TABLE_NAME).isEmpty();
162 return false;
165 @Override
166 public String explainFailure() throws Exception {
167 return "The region for " + TABLE_NAME + " is still on " + rsToSuspend.getServerName();
170 try {
171 region.compact(true);
172 fail("Should fail as our wal file has already been closed, " +
173 "and walDir has also been renamed");
174 } catch (Exception e) {
175 LOG.debug("expected exception: ", e);
177 Table table = UTIL.getConnection().getTable(TABLE_NAME);
178 // should not hit FNFE
179 for (int i = 0; i < 20; i++) {
180 assertEquals(i, Bytes.toInt(table.get(new Get(Bytes.toBytes(i))).getValue(CF, CQ)));