HBASE-24033 Add ut for loading the corrupt recovered hfiles (#1322)
[hbase.git] / hbase-server / src / test / java / org / apache / hadoop / hbase / wal / TestWALOpenAfterDNRollingStart.java
blob7d7896c3f999570b24b0a79d5e52c0d9bd0e270e
1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org.apache.hadoop.hbase.wal;
20 import java.io.IOException;
21 import java.util.Arrays;
22 import java.util.List;
23 import org.apache.hadoop.fs.Path;
24 import org.apache.hadoop.hbase.HBaseClassTestRule;
25 import org.apache.hadoop.hbase.HBaseTestingUtility;
26 import org.apache.hadoop.hbase.HConstants;
27 import org.apache.hadoop.hbase.regionserver.HRegionServer;
28 import org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL;
29 import org.apache.hadoop.hbase.testclassification.LargeTests;
30 import org.apache.hadoop.hbase.testclassification.RegionServerTests;
31 import org.apache.hadoop.hbase.util.FSUtils;
32 import org.junit.After;
33 import org.junit.AfterClass;
34 import org.junit.Before;
35 import org.junit.BeforeClass;
36 import org.junit.ClassRule;
37 import org.junit.Test;
38 import org.junit.experimental.categories.Category;
39 import org.junit.runner.RunWith;
40 import org.junit.runners.Parameterized;
41 import org.junit.runners.Parameterized.Parameter;
42 import org.junit.runners.Parameterized.Parameters;
44 @RunWith(Parameterized.class)
45 @Category({ RegionServerTests.class, LargeTests.class })
46 public class TestWALOpenAfterDNRollingStart {
48 @ClassRule
49 public static final HBaseClassTestRule CLASS_RULE =
50 HBaseClassTestRule.forClass(TestWALOpenAfterDNRollingStart.class);
52 private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
53 // Sleep time before restart next dn, we need to wait the current dn to finish start up
54 private static long DN_RESTART_INTERVAL = 15000;
56 // interval of checking low replication. The sleep time must smaller than
57 // DataNodeRestartInterval
58 // so a low replication case will be detected and the wal will be rolled
59 private static long CHECK_LOW_REPLICATION_INTERVAL = 10000;
61 @Parameter
62 public String walProvider;
64 @Parameters(name = "{index}: wal={0}")
65 public static List<Object[]> data() {
66 return Arrays.asList(new Object[] { "asyncfs" }, new Object[] { "filesystem" });
69 @BeforeClass
70 public static void setUpBeforeClass() throws Exception {
71 // don't let hdfs client to choose a new replica when dn down
72 TEST_UTIL.getConfiguration()
73 .setBoolean("dfs.client.block.write.replace-datanode-on-failure.enable", false);
74 TEST_UTIL.getConfiguration().setLong("hbase.regionserver.hlog.check.lowreplication.interval",
75 CHECK_LOW_REPLICATION_INTERVAL);
76 TEST_UTIL.startMiniDFSCluster(3);
77 TEST_UTIL.startMiniZKCluster();
80 @Before
81 public void setUp() throws IOException, InterruptedException {
82 TEST_UTIL.getConfiguration().set("hbase.wal.provider", walProvider);
83 TEST_UTIL.startMiniHBaseCluster();
86 @After
87 public void tearDown() throws Exception {
88 TEST_UTIL.shutdownMiniHBaseCluster();
91 @AfterClass
92 public static void tearDownAfterClass() throws Exception {
93 TEST_UTIL.shutdownMiniCluster();
96 /**
97 * see HBASE-18132 This is a test case of failing open a wal(for replication for example) after
98 * all datanode restarted (rolling upgrade, for example). Before this patch, low replication
99 * detection is only used when syncing wal. But if the wal haven't had any entry whiten, it will
100 * never know all the replica of the wal is broken(because of dn restarting). And this wal can
101 * never be open
102 * @throws Exception
104 @Test
105 public void test() throws Exception {
106 HRegionServer server = TEST_UTIL.getHBaseCluster().getRegionServer(0);
107 AbstractFSWAL<?> wal = (AbstractFSWAL<?>) server.getWAL(null);
108 Path currentFile = wal.getCurrentFileName();
109 // restart every dn to simulate a dn rolling upgrade
110 for (int i = 0, n = TEST_UTIL.getDFSCluster().getDataNodes().size(); i < n; i++) {
111 // This is NOT a bug, when restart dn in miniDFSCluster, it will remove the stopped dn from
112 // the dn list and then add to the tail of this list, we need to always restart the first one
113 // to simulate rolling upgrade of every dn.
114 TEST_UTIL.getDFSCluster().restartDataNode(0);
115 // sleep enough time so log roller can detect the pipeline break and roll log
116 Thread.sleep(DN_RESTART_INTERVAL);
119 if (!server.getFileSystem().exists(currentFile)) {
120 Path walRootDir = FSUtils.getWALRootDir(TEST_UTIL.getConfiguration());
121 final Path oldLogDir = new Path(walRootDir, HConstants.HREGION_OLDLOGDIR_NAME);
122 currentFile = new Path(oldLogDir, currentFile.getName());
124 // if the log is not rolled, then we can never open this wal forever.
125 try (WAL.Reader reader = WALFactory.createReader(TEST_UTIL.getTestFileSystem(), currentFile,
126 TEST_UTIL.getConfiguration())) {
127 reader.next();