HBASE-26700 The way we bypass broken track file is not enough in StoreFileListFile...
[hbase.git] / hbase-server / src / test / java / org / apache / hadoop / hbase / regionserver / TestFailedAppendAndSync.java
blob05e0f1f42eec96c9caddd4886dc0b9d89daa77b5
1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org.apache.hadoop.hbase.regionserver;
20 import static org.junit.Assert.assertTrue;
21 import static org.junit.Assert.fail;
22 import static org.mockito.Mockito.mock;
23 import static org.mockito.Mockito.when;
25 import java.io.IOException;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.concurrent.atomic.AtomicLong;
30 import org.apache.hadoop.conf.Configuration;
31 import org.apache.hadoop.fs.FileSystem;
32 import org.apache.hadoop.fs.Path;
33 import org.apache.hadoop.hbase.DroppedSnapshotException;
34 import org.apache.hadoop.hbase.HBaseClassTestRule;
35 import org.apache.hadoop.hbase.HBaseTestingUtil;
36 import org.apache.hadoop.hbase.HConstants;
37 import org.apache.hadoop.hbase.Server;
38 import org.apache.hadoop.hbase.TableName;
39 import org.apache.hadoop.hbase.client.Durability;
40 import org.apache.hadoop.hbase.client.Put;
41 import org.apache.hadoop.hbase.regionserver.wal.FSHLog;
42 import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
43 import org.apache.hadoop.hbase.testclassification.SmallTests;
44 import org.apache.hadoop.hbase.util.Bytes;
45 import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
46 import org.apache.hadoop.hbase.util.Pair;
47 import org.apache.hadoop.hbase.util.Threads;
48 import org.apache.hadoop.hbase.wal.WAL;
49 import org.apache.hadoop.hbase.wal.WALProvider.Writer;
50 import org.junit.After;
51 import org.junit.Before;
52 import org.junit.ClassRule;
53 import org.junit.Rule;
54 import org.junit.Test;
55 import org.junit.experimental.categories.Category;
56 import org.junit.rules.TestName;
57 import org.mockito.Mockito;
58 import org.mockito.exceptions.verification.WantedButNotInvoked;
59 import org.slf4j.Logger;
60 import org.slf4j.LoggerFactory;
62 /**
63 * Testing sync/append failures.
64 * Copied from TestHRegion.
66 @Category({SmallTests.class})
67 public class TestFailedAppendAndSync {
69 @ClassRule
70 public static final HBaseClassTestRule CLASS_RULE =
71 HBaseClassTestRule.forClass(TestFailedAppendAndSync.class);
73 private static final Logger LOG = LoggerFactory.getLogger(TestFailedAppendAndSync.class);
74 @Rule public TestName name = new TestName();
76 private static final String COLUMN_FAMILY = "MyCF";
77 private static final byte [] COLUMN_FAMILY_BYTES = Bytes.toBytes(COLUMN_FAMILY);
79 HRegion region = null;
80 // Do not run unit tests in parallel (? Why not? It don't work? Why not? St.Ack)
81 private static HBaseTestingUtil TEST_UTIL;
82 public static Configuration CONF ;
83 private String dir;
85 // Test names
86 protected TableName tableName;
88 @Before
89 public void setup() throws IOException {
90 TEST_UTIL = new HBaseTestingUtil();
91 CONF = TEST_UTIL.getConfiguration();
92 // Disable block cache.
93 CONF.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f);
94 dir = TEST_UTIL.getDataTestDir("TestHRegion").toString();
95 tableName = TableName.valueOf(name.getMethodName());
98 @After
99 public void tearDown() throws Exception {
100 EnvironmentEdgeManagerTestHelper.reset();
101 LOG.info("Cleaning test directory: " + TEST_UTIL.getDataTestDir());
102 TEST_UTIL.cleanupTestDir();
105 String getName() {
106 return name.getMethodName();
109 // Dodgy WAL. Will throw exceptions when flags set.
110 class DodgyFSLog extends FSHLog {
111 volatile boolean throwSyncException = false;
112 volatile boolean throwAppendException = false;
113 volatile boolean throwArchiveException = false;
115 final AtomicLong rolls = new AtomicLong(0);
117 public DodgyFSLog(FileSystem fs, Server server, Path root, String logDir, Configuration conf)
118 throws IOException {
119 super(fs, server, root, logDir, conf);
122 @Override
123 public Map<byte[], List<byte[]>> rollWriter(boolean force)
124 throws FailedLogCloseException, IOException {
125 Map<byte[], List<byte[]>> regions = super.rollWriter(force);
126 rolls.getAndIncrement();
127 return regions;
130 @Override
131 protected void archiveLogFile(Path p) throws IOException {
132 if (throwArchiveException) {
133 throw new IOException("throw archival exception");
137 @Override
138 protected void archive(Pair<Path, Long> localLogsToArchive) {
139 super.archive(localLogsToArchive);
142 @Override
143 protected Writer createWriterInstance(Path path) throws IOException {
144 final Writer w = super.createWriterInstance(path);
145 return new Writer() {
146 @Override
147 public void close() throws IOException {
148 w.close();
151 @Override
152 public void sync(boolean forceSync) throws IOException {
153 if (throwSyncException) {
154 throw new IOException("FAKE! Failed to replace a bad datanode...");
156 w.sync(forceSync);
159 @Override
160 public void append(Entry entry) throws IOException {
161 if (throwAppendException) {
162 throw new IOException("FAKE! Failed to replace a bad datanode...");
164 w.append(entry);
167 @Override
168 public long getLength() {
169 return w.getLength();
172 @Override
173 public long getSyncedLength() {
174 return w.getSyncedLength();
180 * Reproduce locking up that happens when we get an exceptions appending and syncing.
181 * See HBASE-14317.
182 * First I need to set up some mocks for Server and RegionServerServices. I also need to
183 * set up a dodgy WAL that will throw an exception when we go to append to it.
185 @Test
186 public void testLockupAroundBadAssignSync() throws IOException {
187 // Make up mocked server and services.
188 RegionServerServices services = mock(RegionServerServices.class);
189 when(services.getConfiguration()).thenReturn(CONF);
190 when(services.isStopped()).thenReturn(false);
191 when(services.isAborted()).thenReturn(false);
192 // OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with
193 // the test.
194 FileSystem fs = FileSystem.get(CONF);
195 Path rootDir = new Path(dir + getName());
196 DodgyFSLog dodgyWAL = new DodgyFSLog(fs, (Server)services, rootDir, getName(), CONF);
197 dodgyWAL.init();
198 LogRoller logRoller = new LogRoller(services);
199 logRoller.addWAL(dodgyWAL);
200 logRoller.start();
202 boolean threwOnSync = false;
203 boolean threwOnAppend = false;
204 boolean threwOnBoth = false;
206 HRegion region = initHRegion(tableName, null, null, CONF, dodgyWAL);
207 try {
208 // Get some random bytes.
209 byte[] value = Bytes.toBytes(getName());
210 try {
211 // First get something into memstore
212 Put put = new Put(value);
213 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value);
214 region.put(put);
215 } catch (IOException ioe) {
216 fail();
218 long rollsCount = dodgyWAL.rolls.get();
219 try {
220 dodgyWAL.throwAppendException = true;
221 dodgyWAL.throwSyncException = false;
222 Put put = new Put(value);
223 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value);
224 region.put(put);
225 } catch (IOException ioe) {
226 threwOnAppend = true;
228 while (rollsCount == dodgyWAL.rolls.get()) {
229 Threads.sleep(100);
231 rollsCount = dodgyWAL.rolls.get();
233 // When we get to here.. we should be ok. A new WAL has been put in place. There were no
234 // appends to sync. We should be able to continue.
236 try {
237 dodgyWAL.throwAppendException = true;
238 dodgyWAL.throwSyncException = true;
239 Put put = new Put(value);
240 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value);
241 region.put(put);
242 } catch (IOException ioe) {
243 threwOnBoth = true;
245 while (rollsCount == dodgyWAL.rolls.get()) {
246 Threads.sleep(100);
249 // Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able
250 // to just continue.
252 // So, should be no abort at this stage. Verify.
253 Mockito.verify(services, Mockito.atLeast(0)).abort(Mockito.anyString(),
254 Mockito.any(Throwable.class));
255 try {
256 dodgyWAL.throwAppendException = false;
257 dodgyWAL.throwSyncException = true;
258 Put put = new Put(value);
259 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value);
260 region.put(put);
261 } catch (IOException ioe) {
262 threwOnSync = true;
264 // An append in the WAL but the sync failed is a server abort condition. That is our
265 // current semantic. Verify. It takes a while for abort to be called. Just hang here till it
266 // happens. If it don't we'll timeout the whole test. That is fine.
267 while (true) {
268 try {
269 Mockito.verify(services, Mockito.atLeast(1)).abort(Mockito.anyString(),
270 Mockito.any(Throwable.class));
271 break;
272 } catch (WantedButNotInvoked t) {
273 Threads.sleep(1);
277 try {
278 dodgyWAL.throwAppendException = false;
279 dodgyWAL.throwSyncException = false;
280 dodgyWAL.throwArchiveException = true;
281 Pair<Path, Long> pair = new Pair<Path, Long>();
282 pair.setFirst(new Path("/a/b/"));
283 pair.setSecond(100L);
284 dodgyWAL.archive(pair);
285 } catch (Throwable ioe) {
287 while (true) {
288 try {
289 // one more abort needs to be called
290 Mockito.verify(services, Mockito.atLeast(2)).abort(Mockito.anyString(),
291 (Throwable) Mockito.anyObject());
292 break;
293 } catch (WantedButNotInvoked t) {
294 Threads.sleep(1);
297 } finally {
298 // To stop logRoller, its server has to say it is stopped.
299 Mockito.when(services.isStopped()).thenReturn(true);
300 if (logRoller != null) logRoller.close();
301 if (region != null) {
302 try {
303 region.close(true);
304 } catch (DroppedSnapshotException e) {
305 LOG.info("On way out; expected!", e);
308 if (dodgyWAL != null) dodgyWAL.close();
309 assertTrue("The regionserver should have thrown an exception", threwOnBoth);
310 assertTrue("The regionserver should have thrown an exception", threwOnAppend);
311 assertTrue("The regionserver should have thrown an exception", threwOnSync);
316 * @return A region on which you must call
317 * {@link HBaseTestingUtil#closeRegionAndWAL(HRegion)} when done.
319 public static HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey,
320 Configuration conf, WAL wal) throws IOException {
321 ChunkCreator.initialize(MemStoreLAB.CHUNK_SIZE_DEFAULT, false, 0, 0,
322 0, null, MemStoreLAB.INDEX_CHUNK_SIZE_PERCENTAGE_DEFAULT);
323 return TEST_UTIL.createLocalHRegion(tableName, startKey, stopKey, conf, false,
324 Durability.SYNC_WAL, wal, COLUMN_FAMILY_BYTES);