HBASE-17532 Replaced explicit type with diamond operator
[hbase.git] / hbase-server / src / test / java / org / apache / hadoop / hbase / util / TestHBaseFsckOneRS.java
blob1d09dfac8756a577fcb747a7844616f68980e004
1 /**
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 package org.apache.hadoop.hbase.util;
22 import org.apache.commons.io.IOUtils;
23 import org.apache.hadoop.conf.Configuration;
24 import org.apache.hadoop.fs.FileStatus;
25 import org.apache.hadoop.fs.FileSystem;
26 import org.apache.hadoop.fs.Path;
27 import org.apache.hadoop.hbase.HColumnDescriptor;
28 import org.apache.hadoop.hbase.HConstants;
29 import org.apache.hadoop.hbase.HRegionInfo;
30 import org.apache.hadoop.hbase.HRegionLocation;
31 import org.apache.hadoop.hbase.HTableDescriptor;
32 import org.apache.hadoop.hbase.MetaTableAccessor;
33 import org.apache.hadoop.hbase.MiniHBaseCluster;
34 import org.apache.hadoop.hbase.ServerName;
35 import org.apache.hadoop.hbase.TableName;
36 import org.apache.hadoop.hbase.client.ClusterConnection;
37 import org.apache.hadoop.hbase.client.Connection;
38 import org.apache.hadoop.hbase.client.ConnectionFactory;
39 import org.apache.hadoop.hbase.client.Delete;
40 import org.apache.hadoop.hbase.client.Get;
41 import org.apache.hadoop.hbase.client.Put;
42 import org.apache.hadoop.hbase.client.RegionLocator;
43 import org.apache.hadoop.hbase.client.Result;
44 import org.apache.hadoop.hbase.client.Table;
45 import org.apache.hadoop.hbase.client.replication.ReplicationAdmin;
46 import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
47 import org.apache.hadoop.hbase.io.hfile.TestHFile;
48 import org.apache.hadoop.hbase.master.AssignmentManager;
49 import org.apache.hadoop.hbase.master.RegionState;
50 import org.apache.hadoop.hbase.master.RegionStates;
51 import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
52 import org.apache.hadoop.hbase.master.procedure.SplitTableRegionProcedure;
53 import org.apache.hadoop.hbase.regionserver.HRegion;
54 import org.apache.hadoop.hbase.regionserver.HRegionServer;
55 import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction;
56 import org.apache.hadoop.hbase.replication.ReplicationFactory;
57 import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
58 import org.apache.hadoop.hbase.replication.ReplicationQueues;
59 import org.apache.hadoop.hbase.replication.ReplicationQueuesArguments;
60 import org.apache.hadoop.hbase.testclassification.LargeTests;
61 import org.apache.hadoop.hbase.testclassification.MiscTests;
62 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
63 import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
64 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
65 import org.junit.AfterClass;
66 import org.junit.Assert;
67 import org.junit.Before;
68 import org.junit.BeforeClass;
69 import org.junit.Ignore;
70 import org.junit.Rule;
71 import org.junit.Test;
72 import org.junit.experimental.categories.Category;
73 import org.junit.rules.TestName;
75 import java.io.IOException;
76 import java.util.ArrayList;
77 import java.util.LinkedList;
78 import java.util.List;
79 import java.util.HashMap;
80 import java.util.Map;
81 import java.util.concurrent.Callable;
82 import java.util.concurrent.CountDownLatch;
83 import java.util.concurrent.ExecutorService;
84 import java.util.concurrent.Executors;
85 import java.util.concurrent.Future;
86 import java.util.concurrent.ScheduledThreadPoolExecutor;
87 import java.util.concurrent.SynchronousQueue;
88 import java.util.concurrent.ThreadPoolExecutor;
89 import java.util.concurrent.TimeUnit;
90 import java.util.concurrent.atomic.AtomicBoolean;
92 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.*;
93 import static org.junit.Assert.*;
95 @Category({MiscTests.class, LargeTests.class})
96 public class TestHBaseFsckOneRS extends BaseTestHBaseFsck {
97 @Rule
98 public TestName name = new TestName();
100 @BeforeClass
101 public static void setUpBeforeClass() throws Exception {
102 TEST_UTIL.getConfiguration().set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY,
103 MasterSyncObserver.class.getName());
105 conf.setInt("hbase.regionserver.handler.count", 2);
106 conf.setInt("hbase.regionserver.metahandler.count", 30);
108 conf.setInt("hbase.htable.threads.max", POOL_SIZE);
109 conf.setInt("hbase.hconnection.threads.max", 2 * POOL_SIZE);
110 conf.setInt("hbase.hbck.close.timeout", 2 * REGION_ONLINE_TIMEOUT);
111 conf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 8 * REGION_ONLINE_TIMEOUT);
112 TEST_UTIL.startMiniCluster(1);
114 tableExecutorService = new ThreadPoolExecutor(1, POOL_SIZE, 60, TimeUnit.SECONDS,
115 new SynchronousQueue<>(), Threads.newDaemonThreadFactory("testhbck"));
117 hbfsckExecutorService = new ScheduledThreadPoolExecutor(POOL_SIZE);
119 AssignmentManager assignmentManager =
120 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
121 regionStates = assignmentManager.getRegionStates();
123 connection = (ClusterConnection) TEST_UTIL.getConnection();
125 admin = connection.getAdmin();
126 admin.setBalancerRunning(false, true);
128 TEST_UTIL.waitUntilAllRegionsAssigned(TableName.META_TABLE_NAME);
129 TEST_UTIL.waitUntilAllRegionsAssigned(TableName.NAMESPACE_TABLE_NAME);
132 @AfterClass
133 public static void tearDownAfterClass() throws Exception {
134 tableExecutorService.shutdown();
135 hbfsckExecutorService.shutdown();
136 admin.close();
137 TEST_UTIL.shutdownMiniCluster();
140 @Before
141 public void setUp() {
142 EnvironmentEdgeManager.reset();
147 * This creates a clean table and confirms that the table is clean.
149 @Test(timeout=180000)
150 public void testHBaseFsckClean() throws Exception {
151 assertNoErrors(doFsck(conf, false));
152 TableName table = TableName.valueOf("tableClean");
153 try {
154 HBaseFsck hbck = doFsck(conf, false);
155 assertNoErrors(hbck);
157 setupTable(table);
158 assertEquals(ROWKEYS.length, countRows());
160 // We created 1 table, should be fine
161 hbck = doFsck(conf, false);
162 assertNoErrors(hbck);
163 assertEquals(0, hbck.getOverlapGroups(table).size());
164 assertEquals(ROWKEYS.length, countRows());
165 } finally {
166 cleanupTable(table);
171 * Test thread pooling in the case where there are more regions than threads
173 @Test (timeout=180000)
174 public void testHbckThreadpooling() throws Exception {
175 final TableName tableName = TableName.valueOf(name.getMethodName());
176 try {
177 // Create table with 4 regions
178 setupTable(tableName);
180 // limit number of threads to 1.
181 Configuration newconf = new Configuration(conf);
182 newconf.setInt("hbasefsck.numthreads", 1);
183 assertNoErrors(doFsck(newconf, false));
185 // We should pass without triggering a RejectedExecutionException
186 } finally {
187 cleanupTable(tableName);
191 @Test (timeout=180000)
192 public void testTableWithNoRegions() throws Exception {
193 // We might end up with empty regions in a table
194 // see also testNoHdfsTable()
195 final TableName tableName = TableName.valueOf(name.getMethodName());
196 try {
197 // create table with one region
198 HTableDescriptor desc = new HTableDescriptor(tableName);
199 HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
200 desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
201 createTable(TEST_UTIL, desc, null);
202 tbl = connection.getTable(tableName, tableExecutorService);
204 // Mess it up by leaving a hole in the assignment, meta, and hdfs data
205 deleteRegion(conf, tbl.getTableDescriptor(), HConstants.EMPTY_START_ROW,
206 HConstants.EMPTY_END_ROW, false, false, true);
208 HBaseFsck hbck = doFsck(conf, false);
209 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
210 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS });
212 doFsck(conf, true);
214 // fix hole
215 doFsck(conf, true);
217 // check that hole fixed
218 assertNoErrors(doFsck(conf, false));
219 } finally {
220 cleanupTable(tableName);
224 @Test (timeout=180000)
225 public void testHbckFixOrphanTable() throws Exception {
226 final TableName tableName = TableName.valueOf(name.getMethodName());
227 FileSystem fs = null;
228 Path tableinfo = null;
229 try {
230 setupTable(tableName);
232 Path hbaseTableDir = FSUtils.getTableDir(
233 FSUtils.getRootDir(conf), tableName);
234 fs = hbaseTableDir.getFileSystem(conf);
235 FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
236 tableinfo = status.getPath();
237 fs.rename(tableinfo, new Path("/.tableinfo"));
239 //to report error if .tableinfo is missing.
240 HBaseFsck hbck = doFsck(conf, false);
241 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
242 HBaseFsck.ErrorReporter.ERROR_CODE.NO_TABLEINFO_FILE });
244 // fix OrphanTable with default .tableinfo (htd not yet cached on master)
245 hbck = doFsck(conf, true);
246 assertNoErrors(hbck);
247 status = null;
248 status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
249 assertNotNull(status);
251 HTableDescriptor htd = admin.getTableDescriptor(tableName);
252 htd.setValue("NOT_DEFAULT", "true");
253 admin.disableTable(tableName);
254 admin.modifyTable(tableName, htd);
255 admin.enableTable(tableName);
256 fs.delete(status.getPath(), true);
258 // fix OrphanTable with cache
259 htd = admin.getTableDescriptor(tableName); // warms up cached htd on master
260 hbck = doFsck(conf, true);
261 assertNoErrors(hbck);
262 status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
263 assertNotNull(status);
264 htd = admin.getTableDescriptor(tableName);
265 assertEquals(htd.getValue("NOT_DEFAULT"), "true");
266 } finally {
267 if (fs != null) {
268 fs.rename(new Path("/.tableinfo"), tableinfo);
270 cleanupTable(tableName);
274 @Test (timeout=180000)
275 public void testReadOnlyProperty() throws Exception {
276 HBaseFsck hbck = doFsck(conf, false);
277 Assert.assertEquals("shouldIgnorePreCheckPermission", true,
278 hbck.shouldIgnorePreCheckPermission());
280 hbck = doFsck(conf, true);
281 Assert.assertEquals("shouldIgnorePreCheckPermission", false,
282 hbck.shouldIgnorePreCheckPermission());
284 hbck = doFsck(conf, true);
285 hbck.setIgnorePreCheckPermission(true);
286 Assert.assertEquals("shouldIgnorePreCheckPermission", true,
287 hbck.shouldIgnorePreCheckPermission());
291 * This creates and fixes a bad table where a region is completely contained
292 * by another region, and there is a hole (sort of like a bad split)
294 @Test (timeout=180000)
295 public void testOverlapAndOrphan() throws Exception {
296 final TableName tableName = TableName.valueOf(name.getMethodName());
297 try {
298 setupTable(tableName);
299 assertEquals(ROWKEYS.length, countRows());
301 // Mess it up by creating an overlap in the metadata
302 admin.disableTable(tableName);
303 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), true,
304 true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
305 admin.enableTable(tableName);
307 HRegionInfo hriOverlap =
308 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
309 TEST_UTIL.assignRegion(hriOverlap);
311 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
312 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
314 HBaseFsck hbck = doFsck(conf, false);
315 assertErrors(hbck,
316 new HBaseFsck.ErrorReporter.ERROR_CODE[] {
317 HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_HDFS_REGION,
318 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
319 HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
321 // fix the problem.
322 doFsck(conf, true);
324 // verify that overlaps are fixed
325 HBaseFsck hbck2 = doFsck(conf,false);
326 assertNoErrors(hbck2);
327 assertEquals(0, hbck2.getOverlapGroups(tableName).size());
328 assertEquals(ROWKEYS.length, countRows());
329 } finally {
330 cleanupTable(tableName);
335 * This creates and fixes a bad table where a region overlaps two regions --
336 * a start key contained in another region and its end key is contained in
337 * yet another region.
339 @Test (timeout=180000)
340 public void testCoveredStartKey() throws Exception {
341 final TableName tableName = TableName.valueOf(name.getMethodName());
342 try {
343 setupTable(tableName);
344 assertEquals(ROWKEYS.length, countRows());
346 // Mess it up by creating an overlap in the metadata
347 HRegionInfo hriOverlap =
348 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B2"));
349 TEST_UTIL.assignRegion(hriOverlap);
351 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
352 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
354 HBaseFsck hbck = doFsck(conf, false);
355 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
356 HBaseFsck.ErrorReporter.ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
357 HBaseFsck.ErrorReporter.ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
358 assertEquals(3, hbck.getOverlapGroups(tableName).size());
359 assertEquals(ROWKEYS.length, countRows());
361 // fix the problem.
362 doFsck(conf, true);
364 // verify that overlaps are fixed
365 HBaseFsck hbck2 = doFsck(conf, false);
366 assertErrors(hbck2, new HBaseFsck.ErrorReporter.ERROR_CODE[0]);
367 assertEquals(0, hbck2.getOverlapGroups(tableName).size());
368 assertEquals(ROWKEYS.length, countRows());
369 } finally {
370 cleanupTable(tableName);
375 * This creates and fixes a bad table with a missing region -- hole in meta
376 * and data missing in the fs.
378 @Test (timeout=180000)
379 public void testRegionHole() throws Exception {
380 final TableName tableName = TableName.valueOf(name.getMethodName());
381 try {
382 setupTable(tableName);
383 assertEquals(ROWKEYS.length, countRows());
385 // Mess it up by leaving a hole in the assignment, meta, and hdfs data
386 admin.disableTable(tableName);
387 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true,
388 true, true);
389 admin.enableTable(tableName);
391 HBaseFsck hbck = doFsck(conf, false);
392 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
393 HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
394 // holes are separate from overlap groups
395 assertEquals(0, hbck.getOverlapGroups(tableName).size());
397 // fix hole
398 doFsck(conf, true);
400 // check that hole fixed
401 assertNoErrors(doFsck(conf,false));
402 assertEquals(ROWKEYS.length - 2, countRows()); // lost a region so lost a row
403 } finally {
404 cleanupTable(tableName);
409 * The region is not deployed when the table is disabled.
411 @Test (timeout=180000)
412 public void testRegionShouldNotBeDeployed() throws Exception {
413 final TableName tableName = TableName.valueOf(name.getMethodName());
414 try {
415 LOG.info("Starting testRegionShouldNotBeDeployed.");
416 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
417 assertTrue(cluster.waitForActiveAndReadyMaster());
420 byte[][] SPLIT_KEYS = new byte[][] { new byte[0], Bytes.toBytes("aaa"),
421 Bytes.toBytes("bbb"), Bytes.toBytes("ccc"), Bytes.toBytes("ddd") };
422 HTableDescriptor htdDisabled = new HTableDescriptor(tableName);
423 htdDisabled.addFamily(new HColumnDescriptor(FAM));
425 // Write the .tableinfo
426 FSTableDescriptors fstd = new FSTableDescriptors(conf);
427 fstd.createTableDescriptor(htdDisabled);
428 List<HRegionInfo> disabledRegions =
429 TEST_UTIL.createMultiRegionsInMeta(conf, htdDisabled, SPLIT_KEYS);
431 // Let's just assign everything to first RS
432 HRegionServer hrs = cluster.getRegionServer(0);
434 // Create region files.
435 admin.disableTable(tableName);
436 admin.enableTable(tableName);
438 // Disable the table and close its regions
439 admin.disableTable(tableName);
440 HRegionInfo region = disabledRegions.remove(0);
441 byte[] regionName = region.getRegionName();
443 // The region should not be assigned currently
444 assertTrue(cluster.getServerWith(regionName) == -1);
446 // Directly open a region on a region server.
447 // If going through AM/ZK, the region won't be open.
448 // Even it is opened, AM will close it which causes
449 // flakiness of this test.
450 HRegion r = HRegion.openHRegion(
451 region, htdDisabled, hrs.getWAL(region), conf);
452 hrs.addToOnlineRegions(r);
454 HBaseFsck hbck = doFsck(conf, false);
455 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
456 HBaseFsck.ErrorReporter.ERROR_CODE.SHOULD_NOT_BE_DEPLOYED });
458 // fix this fault
459 doFsck(conf, true);
461 // check result
462 assertNoErrors(doFsck(conf, false));
463 } finally {
464 admin.enableTable(tableName);
465 cleanupTable(tableName);
470 * This test makes sure that parallel instances of Hbck is disabled.
472 * @throws Exception
474 @Test(timeout=180000)
475 public void testParallelHbck() throws Exception {
476 final ExecutorService service;
477 final Future<HBaseFsck> hbck1,hbck2;
479 class RunHbck implements Callable<HBaseFsck> {
480 boolean fail = true;
481 @Override
482 public HBaseFsck call(){
483 Configuration c = new Configuration(conf);
484 c.setInt("hbase.hbck.lockfile.attempts", 1);
485 // HBASE-13574 found that in HADOOP-2.6 and later, the create file would internally retry.
486 // To avoid flakiness of the test, set low max wait time.
487 c.setInt("hbase.hbck.lockfile.maxwaittime", 3);
488 try{
489 return doFsck(c, true); // Exclusive hbck only when fixing
490 } catch(Exception e){
491 if (e.getMessage().contains("Duplicate hbck")) {
492 fail = false;
495 // If we reach here, then an exception was caught
496 if (fail) fail();
497 return null;
500 service = Executors.newFixedThreadPool(2);
501 hbck1 = service.submit(new RunHbck());
502 hbck2 = service.submit(new RunHbck());
503 service.shutdown();
504 //wait for 15 seconds, for both hbck calls finish
505 service.awaitTermination(15, TimeUnit.SECONDS);
506 HBaseFsck h1 = hbck1.get();
507 HBaseFsck h2 = hbck2.get();
508 // Make sure only one of the calls was successful
509 assert(h1 == null || h2 == null);
510 if (h1 != null) {
511 assert(h1.getRetCode() >= 0);
513 if (h2 != null) {
514 assert(h2.getRetCode() >= 0);
519 * This test makes sure that with enough retries both parallel instances
520 * of hbck will be completed successfully.
522 * @throws Exception
524 @Test (timeout=180000)
525 public void testParallelWithRetriesHbck() throws Exception {
526 final ExecutorService service;
527 final Future<HBaseFsck> hbck1,hbck2;
529 // With the ExponentialBackoffPolicyWithLimit (starting with 200 milliseconds sleep time, and
530 // max sleep time of 5 seconds), we can retry around 15 times within 80 seconds before bail out.
532 // Note: the reason to use 80 seconds is that in HADOOP-2.6 and later, the create file would
533 // retry up to HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds). See HBASE-13574 for more
534 // details.
535 final int timeoutInSeconds = 80;
536 final int sleepIntervalInMilliseconds = 200;
537 final int maxSleepTimeInMilliseconds = 6000;
538 final int maxRetryAttempts = 15;
540 class RunHbck implements Callable<HBaseFsck>{
542 @Override
543 public HBaseFsck call() throws Exception {
544 // Increase retry attempts to make sure the non-active hbck doesn't get starved
545 Configuration c = new Configuration(conf);
546 c.setInt("hbase.hbck.lockfile.maxwaittime", timeoutInSeconds);
547 c.setInt("hbase.hbck.lockfile.attempt.sleep.interval", sleepIntervalInMilliseconds);
548 c.setInt("hbase.hbck.lockfile.attempt.maxsleeptime", maxSleepTimeInMilliseconds);
549 c.setInt("hbase.hbck.lockfile.attempts", maxRetryAttempts);
550 return doFsck(c, false);
554 service = Executors.newFixedThreadPool(2);
555 hbck1 = service.submit(new RunHbck());
556 hbck2 = service.submit(new RunHbck());
557 service.shutdown();
558 //wait for some time, for both hbck calls finish
559 service.awaitTermination(timeoutInSeconds * 2, TimeUnit.SECONDS);
560 HBaseFsck h1 = hbck1.get();
561 HBaseFsck h2 = hbck2.get();
562 // Both should be successful
563 assertNotNull(h1);
564 assertNotNull(h2);
565 assert(h1.getRetCode() >= 0);
566 assert(h2.getRetCode() >= 0);
570 @Test (timeout = 180000)
571 public void testRegionBoundariesCheck() throws Exception {
572 HBaseFsck hbck = doFsck(conf, false);
573 assertNoErrors(hbck); // no errors
574 try {
575 hbck.connect(); // need connection to have access to META
576 hbck.checkRegionBoundaries();
577 } catch (IllegalArgumentException e) {
578 if (e.getMessage().endsWith("not a valid DFS filename.")) {
579 fail("Table directory path is not valid." + e.getMessage());
581 } finally {
582 hbck.close();
587 * test region boundaries and make sure store file had been created.
588 * @throws Exception
590 @Test(timeout = 180000)
591 public void testRegionBoundariesCheckWithFlushTable() throws Exception {
592 HBaseFsck hbck = doFsck(conf, false);
593 assertNoErrors(hbck); // no errors
594 final TableName tableName = TableName.valueOf(name.getMethodName());
595 try {
596 setupTable(tableName);
597 admin.flush(tableName);
598 hbck.connect(); // need connection to have access to META
599 hbck.checkRegionBoundaries();
600 assertNoErrors(hbck); // no errors
601 } catch (IllegalArgumentException e) {
602 if (e.getMessage().endsWith("not a valid DFS filename.")) {
603 fail("Table directory path is not valid." + e.getMessage());
605 } finally {
606 hbck.close();
610 @Test (timeout=180000)
611 public void testHbckAfterRegionMerge() throws Exception {
612 final TableName tableName = TableName.valueOf(name.getMethodName());
613 Table meta = null;
614 try {
615 // disable CatalogJanitor
616 TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
617 setupTable(tableName);
618 assertEquals(ROWKEYS.length, countRows());
620 try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
621 // make sure data in regions, if in wal only there is no data loss
622 admin.flush(tableName);
623 HRegionInfo region1 = rl.getRegionLocation(Bytes.toBytes("A")).getRegionInfo();
624 HRegionInfo region2 = rl.getRegionLocation(Bytes.toBytes("B")).getRegionInfo();
626 int regionCountBeforeMerge = rl.getAllRegionLocations().size();
628 assertNotEquals(region1, region2);
630 // do a region merge
631 admin.mergeRegionsAsync(
632 region1.getEncodedNameAsBytes(), region2.getEncodedNameAsBytes(), false);
634 // wait until region merged
635 long timeout = System.currentTimeMillis() + 30 * 1000;
636 while (true) {
637 if (rl.getAllRegionLocations().size() < regionCountBeforeMerge) {
638 break;
639 } else if (System.currentTimeMillis() > timeout) {
640 fail("Time out waiting on region " + region1.getEncodedName() + " and " + region2
641 .getEncodedName() + " be merged");
643 Thread.sleep(10);
646 assertEquals(ROWKEYS.length, countRows());
648 HBaseFsck hbck = doFsck(conf, false);
649 assertNoErrors(hbck); // no errors
652 } finally {
653 TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true);
654 cleanupTable(tableName);
655 IOUtils.closeQuietly(meta);
659 * This creates entries in hbase:meta with no hdfs data. This should cleanly
660 * remove the table.
662 @Test (timeout=180000)
663 public void testNoHdfsTable() throws Exception {
664 final TableName tableName = TableName.valueOf(name.getMethodName());
665 setupTable(tableName);
666 assertEquals(ROWKEYS.length, countRows());
668 // make sure data in regions, if in wal only there is no data loss
669 admin.flush(tableName);
671 // Mess it up by deleting hdfs dirs
672 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""),
673 Bytes.toBytes("A"), false, false, true); // don't rm meta
674 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
675 Bytes.toBytes("B"), false, false, true); // don't rm meta
676 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
677 Bytes.toBytes("C"), false, false, true); // don't rm meta
678 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"),
679 Bytes.toBytes(""), false, false, true); // don't rm meta
681 // also remove the table directory in hdfs
682 deleteTableDir(tableName);
684 HBaseFsck hbck = doFsck(conf, false);
685 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
686 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS,
687 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS,
688 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS,
689 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS,
690 HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_TABLE_STATE, });
691 // holes are separate from overlap groups
692 assertEquals(0, hbck.getOverlapGroups(tableName).size());
694 // fix hole
695 doFsck(conf, true); // detect dangling regions and remove those
697 // check that hole fixed
698 assertNoErrors(doFsck(conf,false));
699 assertFalse("Table " + tableName + " should have been deleted", admin.tableExists(tableName));
703 * when the hbase.version file missing, It is fix the fault.
705 @Test (timeout=180000)
706 public void testNoVersionFile() throws Exception {
707 // delete the hbase.version file
708 Path rootDir = FSUtils.getRootDir(conf);
709 FileSystem fs = rootDir.getFileSystem(conf);
710 Path versionFile = new Path(rootDir, HConstants.VERSION_FILE_NAME);
711 fs.delete(versionFile, true);
713 // test
714 HBaseFsck hbck = doFsck(conf, false);
715 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
716 HBaseFsck.ErrorReporter.ERROR_CODE.NO_VERSION_FILE });
717 // fix hbase.version missing
718 doFsck(conf, true);
720 // no version file fixed
721 assertNoErrors(doFsck(conf, false));
724 @Test (timeout=180000)
725 public void testNoTableState() throws Exception {
726 // delete the hbase.version file
727 final TableName tableName = TableName.valueOf(name.getMethodName());
728 try {
729 setupTable(tableName);
730 // make sure data in regions, if in wal only there is no data loss
731 admin.flush(tableName);
733 MetaTableAccessor.deleteTableState(TEST_UTIL.getConnection(), tableName);
735 // test
736 HBaseFsck hbck = doFsck(conf, false);
737 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
738 HBaseFsck.ErrorReporter.ERROR_CODE.NO_TABLE_STATE });
739 // fix table state missing
740 doFsck(conf, true);
742 assertNoErrors(doFsck(conf, false));
743 assertTrue(TEST_UTIL.getAdmin().isTableEnabled(tableName));
744 } finally {
745 cleanupTable(tableName);
750 * This creates two tables and mess both of them and fix them one by one
752 @Test (timeout=180000)
753 public void testFixByTable() throws Exception {
754 final TableName tableName1 = TableName.valueOf(name.getMethodName() + "1");
755 final TableName tableName2 = TableName.valueOf(name.getMethodName() + "2");
756 try {
757 setupTable(tableName1);
758 // make sure data in regions, if in wal only there is no data loss
759 admin.flush(tableName1);
760 // Mess them up by leaving a hole in the hdfs data
761 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
762 Bytes.toBytes("C"), false, false, true); // don't rm meta
764 setupTable(tableName2);
765 // make sure data in regions, if in wal only there is no data loss
766 admin.flush(tableName2);
767 // Mess them up by leaving a hole in the hdfs data
768 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false,
769 false, true); // don't rm meta
771 HBaseFsck hbck = doFsck(conf, false);
772 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
773 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS,
774 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS });
776 // fix hole in table 1
777 doFsck(conf, true, tableName1);
778 // check that hole in table 1 fixed
779 assertNoErrors(doFsck(conf, false, tableName1));
780 // check that hole in table 2 still there
781 assertErrors(doFsck(conf, false, tableName2), new HBaseFsck.ErrorReporter.ERROR_CODE[] {
782 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS });
784 // fix hole in table 2
785 doFsck(conf, true, tableName2);
786 // check that hole in both tables fixed
787 assertNoErrors(doFsck(conf, false));
788 assertEquals(ROWKEYS.length - 2, countRows());
789 } finally {
790 cleanupTable(tableName1);
791 cleanupTable(tableName2);
795 * A split parent in meta, in hdfs, and not deployed
797 @Test (timeout=180000)
798 public void testLingeringSplitParent() throws Exception {
799 final TableName tableName = TableName.valueOf(name.getMethodName());
800 Table meta = null;
801 try {
802 setupTable(tableName);
803 assertEquals(ROWKEYS.length, countRows());
805 // make sure data in regions, if in wal only there is no data loss
806 admin.flush(tableName);
808 HRegionLocation location;
809 try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
810 location = rl.getRegionLocation(Bytes.toBytes("B"));
813 // Delete one region from meta, but not hdfs, unassign it.
814 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
815 Bytes.toBytes("C"), true, true, false);
817 // Create a new meta entry to fake it as a split parent.
818 meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
819 HRegionInfo hri = location.getRegionInfo();
821 HRegionInfo a = new HRegionInfo(tbl.getName(),
822 Bytes.toBytes("B"), Bytes.toBytes("BM"));
823 HRegionInfo b = new HRegionInfo(tbl.getName(),
824 Bytes.toBytes("BM"), Bytes.toBytes("C"));
826 hri.setOffline(true);
827 hri.setSplit(true);
829 MetaTableAccessor.addRegionToMeta(meta, hri, a, b);
830 meta.close();
831 admin.flush(TableName.META_TABLE_NAME);
833 HBaseFsck hbck = doFsck(conf, false);
834 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
835 HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT,
836 HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN});
838 // regular repair cannot fix lingering split parent
839 hbck = doFsck(conf, true);
840 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
841 HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT,
842 HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
843 assertFalse(hbck.shouldRerun());
844 hbck = doFsck(conf, false);
845 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
846 HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT,
847 HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN});
849 // fix lingering split parent
850 hbck = new HBaseFsck(conf, hbfsckExecutorService);
851 hbck.connect();
852 HBaseFsck.setDisplayFullReport(); // i.e. -details
853 hbck.setTimeLag(0);
854 hbck.setFixSplitParents(true);
855 hbck.onlineHbck();
856 assertTrue(hbck.shouldRerun());
857 hbck.close();
859 Get get = new Get(hri.getRegionName());
860 Result result = meta.get(get);
861 assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
862 HConstants.SPLITA_QUALIFIER).isEmpty());
863 assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
864 HConstants.SPLITB_QUALIFIER).isEmpty());
865 admin.flush(TableName.META_TABLE_NAME);
867 // fix other issues
868 doFsck(conf, true);
870 // check that all are fixed
871 assertNoErrors(doFsck(conf, false));
872 assertEquals(ROWKEYS.length, countRows());
873 } finally {
874 cleanupTable(tableName);
875 IOUtils.closeQuietly(meta);
880 * Tests that LINGERING_SPLIT_PARENT is not erroneously reported for
881 * valid cases where the daughters are there.
883 @Test (timeout=180000)
884 public void testValidLingeringSplitParent() throws Exception {
885 final TableName tableName = TableName.valueOf(name.getMethodName());
886 Table meta = null;
887 try {
888 setupTable(tableName);
889 assertEquals(ROWKEYS.length, countRows());
891 // make sure data in regions, if in wal only there is no data loss
892 admin.flush(tableName);
894 try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
895 HRegionLocation location = rl.getRegionLocation(Bytes.toBytes("B"));
897 meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
898 HRegionInfo hri = location.getRegionInfo();
900 // do a regular split
901 byte[] regionName = location.getRegionInfo().getRegionName();
902 admin.splitRegion(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
903 TestEndToEndSplitTransaction.blockUntilRegionSplit(conf, 60000, regionName, true);
905 // TODO: fixHdfsHoles does not work against splits, since the parent dir lingers on
906 // for some time until children references are deleted. HBCK erroneously sees this as
907 // overlapping regions
908 HBaseFsck hbck = doFsck(conf, true, true, false, false, false, true, true, true, true,
909 false, false, false, null);
910 // no LINGERING_SPLIT_PARENT reported
911 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported
913 // assert that the split hbase:meta entry is still there.
914 Get get = new Get(hri.getRegionName());
915 Result result = meta.get(get);
916 assertNotNull(result);
917 assertNotNull(MetaTableAccessor.getHRegionInfo(result));
919 assertEquals(ROWKEYS.length, countRows());
921 // assert that we still have the split regions
922 //SPLITS + 1 is # regions pre-split.
923 assertEquals(rl.getStartKeys().length, SPLITS.length + 1 + 1);
924 assertNoErrors(doFsck(conf, false));
926 } finally {
927 cleanupTable(tableName);
928 IOUtils.closeQuietly(meta);
933 * Split crashed after write to hbase:meta finished for the parent region, but
934 * failed to write daughters (pre HBASE-7721 codebase)
936 @Test(timeout=75000)
937 public void testSplitDaughtersNotInMeta() throws Exception {
938 final TableName tableName = TableName.valueOf(name.getMethodName());
939 Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
940 try {
941 setupTable(tableName);
942 assertEquals(ROWKEYS.length, countRows());
944 // make sure data in regions, if in wal only there is no data loss
945 admin.flush(tableName);
947 try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
948 HRegionLocation location = rl.getRegionLocation(Bytes.toBytes("B"));
950 HRegionInfo hri = location.getRegionInfo();
952 // Disable CatalogJanitor to prevent it from cleaning up the parent region
953 // after split.
954 admin.enableCatalogJanitor(false);
956 // do a regular split
957 byte[] regionName = location.getRegionInfo().getRegionName();
958 admin.splitRegion(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
959 TestEndToEndSplitTransaction.blockUntilRegionSplit(conf, 60000, regionName, true);
961 PairOfSameType<HRegionInfo> daughters = MetaTableAccessor.getDaughterRegions(
962 meta.get(new Get(regionName)));
964 // Delete daughter regions from meta, but not hdfs, unassign it.
966 ServerName firstSN =
967 rl.getRegionLocation(daughters.getFirst().getStartKey()).getServerName();
968 ServerName secondSN =
969 rl.getRegionLocation(daughters.getSecond().getStartKey()).getServerName();
971 undeployRegion(connection, firstSN, daughters.getFirst());
972 undeployRegion(connection, secondSN, daughters.getSecond());
974 List<Delete> deletes = new ArrayList<>(2);
975 deletes.add(new Delete(daughters.getFirst().getRegionName()));
976 deletes.add(new Delete(daughters.getSecond().getRegionName()));
977 meta.delete(deletes);
979 // Remove daughters from regionStates
980 RegionStates regionStates = TEST_UTIL.getMiniHBaseCluster().getMaster().
981 getAssignmentManager().getRegionStates();
982 regionStates.deleteRegion(daughters.getFirst());
983 regionStates.deleteRegion(daughters.getSecond());
985 HBaseFsck hbck = doFsck(conf, false);
986 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
987 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
988 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
989 HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); //no LINGERING_SPLIT_PARENT
991 // now fix it. The fix should not revert the region split, but add daughters to META
992 hbck = doFsck(conf, true, true, false, false, false, false, false, false, false,
993 false, false, false, null);
994 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
995 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
996 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
997 HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
999 // assert that the split hbase:meta entry is still there.
1000 Get get = new Get(hri.getRegionName());
1001 Result result = meta.get(get);
1002 assertNotNull(result);
1003 assertNotNull(MetaTableAccessor.getHRegionInfo(result));
1005 assertEquals(ROWKEYS.length, countRows());
1007 // assert that we still have the split regions
1008 assertEquals(rl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions
1009 // pre-split.
1010 assertNoErrors(doFsck(conf, false)); //should be fixed by now
1012 } finally {
1013 admin.enableCatalogJanitor(true);
1014 meta.close();
1015 cleanupTable(tableName);
1020 * This creates and fixes a bad table with a missing region which is the 1st region -- hole in
1021 * meta and data missing in the fs.
1023 @Test(timeout=120000)
1024 public void testMissingFirstRegion() throws Exception {
1025 final TableName tableName = TableName.valueOf(name.getMethodName());
1026 try {
1027 setupTable(tableName);
1028 assertEquals(ROWKEYS.length, countRows());
1030 // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1031 admin.disableTable(tableName);
1032 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), Bytes.toBytes("A"), true,
1033 true, true);
1034 admin.enableTable(tableName);
1036 HBaseFsck hbck = doFsck(conf, false);
1037 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
1038 HBaseFsck.ErrorReporter.ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY });
1039 // fix hole
1040 doFsck(conf, true);
1041 // check that hole fixed
1042 assertNoErrors(doFsck(conf, false));
1043 } finally {
1044 cleanupTable(tableName);
1049 * This creates and fixes a bad table with a missing region which is the 1st region -- hole in
1050 * meta and data missing in the fs.
1052 @Test(timeout=120000)
1053 public void testRegionDeployedNotInHdfs() throws Exception {
1054 final TableName tableName = TableName.valueOf(name.getMethodName());
1055 try {
1056 setupTable(tableName);
1057 admin.flush(tableName);
1059 // Mess it up by deleting region dir
1060 deleteRegion(conf, tbl.getTableDescriptor(),
1061 HConstants.EMPTY_START_ROW, Bytes.toBytes("A"), false,
1062 false, true);
1064 HBaseFsck hbck = doFsck(conf, false);
1065 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
1066 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS });
1067 // fix hole
1068 doFsck(conf, true);
1069 // check that hole fixed
1070 assertNoErrors(doFsck(conf, false));
1071 } finally {
1072 cleanupTable(tableName);
1077 * This creates and fixes a bad table with missing last region -- hole in meta and data missing in
1078 * the fs.
1080 @Test(timeout=120000)
1081 public void testMissingLastRegion() throws Exception {
1082 final TableName tableName = TableName.valueOf(name.getMethodName());
1083 try {
1084 setupTable(tableName);
1085 assertEquals(ROWKEYS.length, countRows());
1087 // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1088 admin.disableTable(tableName);
1089 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""), true,
1090 true, true);
1091 admin.enableTable(tableName);
1093 HBaseFsck hbck = doFsck(conf, false);
1094 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
1095 HBaseFsck.ErrorReporter.ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY });
1096 // fix hole
1097 doFsck(conf, true);
1098 // check that hole fixed
1099 assertNoErrors(doFsck(conf, false));
1100 } finally {
1101 cleanupTable(tableName);
1106 * Test -noHdfsChecking option can detect and fix assignments issue.
1108 @Test (timeout=180000)
1109 public void testFixAssignmentsAndNoHdfsChecking() throws Exception {
1110 final TableName tableName = TableName.valueOf(name.getMethodName());
1111 try {
1112 setupTable(tableName);
1113 assertEquals(ROWKEYS.length, countRows());
1115 // Mess it up by closing a region
1116 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), true,
1117 false, false, false, HRegionInfo.DEFAULT_REPLICA_ID);
1119 // verify there is no other errors
1120 HBaseFsck hbck = doFsck(conf, false);
1121 assertErrors(hbck,
1122 new HBaseFsck.ErrorReporter.ERROR_CODE[] {
1123 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_DEPLOYED,
1124 HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
1126 // verify that noHdfsChecking report the same errors
1127 HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
1128 fsck.connect();
1129 HBaseFsck.setDisplayFullReport(); // i.e. -details
1130 fsck.setTimeLag(0);
1131 fsck.setCheckHdfs(false);
1132 fsck.onlineHbck();
1133 assertErrors(fsck,
1134 new HBaseFsck.ErrorReporter.ERROR_CODE[] {
1135 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_DEPLOYED,
1136 HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
1137 fsck.close();
1139 // verify that fixAssignments works fine with noHdfsChecking
1140 fsck = new HBaseFsck(conf, hbfsckExecutorService);
1141 fsck.connect();
1142 HBaseFsck.setDisplayFullReport(); // i.e. -details
1143 fsck.setTimeLag(0);
1144 fsck.setCheckHdfs(false);
1145 fsck.setFixAssignments(true);
1146 fsck.onlineHbck();
1147 assertTrue(fsck.shouldRerun());
1148 fsck.onlineHbck();
1149 assertNoErrors(fsck);
1151 assertEquals(ROWKEYS.length, countRows());
1153 fsck.close();
1154 } finally {
1155 cleanupTable(tableName);
1160 * Test -noHdfsChecking option can detect region is not in meta but deployed.
1161 * However, it can not fix it without checking Hdfs because we need to get
1162 * the region info from Hdfs in this case, then to patch the meta.
1164 @Test (timeout=180000)
1165 public void testFixMetaNotWorkingWithNoHdfsChecking() throws Exception {
1166 final TableName tableName = TableName.valueOf(name.getMethodName());
1167 try {
1168 setupTable(tableName);
1169 assertEquals(ROWKEYS.length, countRows());
1171 // Mess it up by deleting a region from the metadata
1172 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1173 Bytes.toBytes("B"), false, true, false, false, HRegionInfo.DEFAULT_REPLICA_ID);
1175 // verify there is no other errors
1176 HBaseFsck hbck = doFsck(conf, false);
1177 assertErrors(hbck,
1178 new HBaseFsck.ErrorReporter.ERROR_CODE[] {
1179 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META,
1180 HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
1182 // verify that noHdfsChecking report the same errors
1183 HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
1184 fsck.connect();
1185 HBaseFsck.setDisplayFullReport(); // i.e. -details
1186 fsck.setTimeLag(0);
1187 fsck.setCheckHdfs(false);
1188 fsck.onlineHbck();
1189 assertErrors(fsck,
1190 new HBaseFsck.ErrorReporter.ERROR_CODE[] {
1191 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META,
1192 HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
1193 fsck.close();
1195 // verify that fixMeta doesn't work with noHdfsChecking
1196 fsck = new HBaseFsck(conf, hbfsckExecutorService);
1197 fsck.connect();
1198 HBaseFsck.setDisplayFullReport(); // i.e. -details
1199 fsck.setTimeLag(0);
1200 fsck.setCheckHdfs(false);
1201 fsck.setFixAssignments(true);
1202 fsck.setFixMeta(true);
1203 fsck.onlineHbck();
1204 assertFalse(fsck.shouldRerun());
1205 assertErrors(fsck,
1206 new HBaseFsck.ErrorReporter.ERROR_CODE[] {
1207 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META,
1208 HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
1209 fsck.close();
1211 // fix the cluster so other tests won't be impacted
1212 fsck = doFsck(conf, true);
1213 assertTrue(fsck.shouldRerun());
1214 fsck = doFsck(conf, true);
1215 assertNoErrors(fsck);
1216 } finally {
1217 cleanupTable(tableName);
1222 * Test -fixHdfsHoles doesn't work with -noHdfsChecking option,
1223 * and -noHdfsChecking can't detect orphan Hdfs region.
1225 @Test (timeout=180000)
1226 public void testFixHdfsHolesNotWorkingWithNoHdfsChecking() throws Exception {
1227 final TableName tableName = TableName.valueOf(name.getMethodName());
1228 try {
1229 setupTable(tableName);
1230 assertEquals(ROWKEYS.length, countRows());
1232 // Mess it up by creating an overlap in the metadata
1233 admin.disableTable(tableName);
1234 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), true,
1235 true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
1236 admin.enableTable(tableName);
1238 HRegionInfo hriOverlap =
1239 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
1240 TEST_UTIL.assignRegion(hriOverlap);
1242 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
1243 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
1245 HBaseFsck hbck = doFsck(conf, false);
1246 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
1247 HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_HDFS_REGION,
1248 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1249 HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN});
1251 // verify that noHdfsChecking can't detect ORPHAN_HDFS_REGION
1252 HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
1253 fsck.connect();
1254 HBaseFsck.setDisplayFullReport(); // i.e. -details
1255 fsck.setTimeLag(0);
1256 fsck.setCheckHdfs(false);
1257 fsck.onlineHbck();
1258 assertErrors(fsck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
1259 HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
1260 fsck.close();
1262 // verify that fixHdfsHoles doesn't work with noHdfsChecking
1263 fsck = new HBaseFsck(conf, hbfsckExecutorService);
1264 fsck.connect();
1265 HBaseFsck.setDisplayFullReport(); // i.e. -details
1266 fsck.setTimeLag(0);
1267 fsck.setCheckHdfs(false);
1268 fsck.setFixHdfsHoles(true);
1269 fsck.setFixHdfsOverlaps(true);
1270 fsck.setFixHdfsOrphans(true);
1271 fsck.onlineHbck();
1272 assertFalse(fsck.shouldRerun());
1273 assertErrors(fsck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
1274 HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
1275 fsck.close();
1276 } finally {
1277 if (admin.isTableDisabled(tableName)) {
1278 admin.enableTable(tableName);
1280 cleanupTable(tableName);
1285 * This creates a table and then corrupts an hfile. Hbck should quarantine the file.
1287 @Test(timeout=180000)
1288 public void testQuarantineCorruptHFile() throws Exception {
1289 final TableName tableName = TableName.valueOf(name.getMethodName());
1290 try {
1291 setupTable(tableName);
1292 assertEquals(ROWKEYS.length, countRows());
1293 admin.flush(tableName); // flush is async.
1295 FileSystem fs = FileSystem.get(conf);
1296 Path hfile = getFlushedHFile(fs, tableName);
1298 // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1299 admin.disableTable(tableName);
1301 // create new corrupt file called deadbeef (valid hfile name)
1302 Path corrupt = new Path(hfile.getParent(), "deadbeef");
1303 TestHFile.truncateFile(fs, hfile, corrupt);
1304 LOG.info("Created corrupted file " + corrupt);
1305 HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf));
1307 // we cannot enable here because enable never finished due to the corrupt region.
1308 HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, tableName);
1309 assertEquals(res.getRetCode(), 0);
1310 HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
1311 assertEquals(hfcc.getHFilesChecked(), 5);
1312 assertEquals(hfcc.getCorrupted().size(), 1);
1313 assertEquals(hfcc.getFailures().size(), 0);
1314 assertEquals(hfcc.getQuarantined().size(), 1);
1315 assertEquals(hfcc.getMissing().size(), 0);
1317 // Its been fixed, verify that we can enable.
1318 admin.enableTable(tableName);
1319 } finally {
1320 cleanupTable(tableName);
1325 * This creates a table and simulates the race situation where a concurrent compaction or split
1326 * has removed an hfile after the corruption checker learned about it.
1328 @Test(timeout=180000)
1329 public void testQuarantineMissingHFile() throws Exception {
1330 final TableName tableName = TableName.valueOf(name.getMethodName());
1332 // inject a fault in the hfcc created.
1333 final FileSystem fs = FileSystem.get(conf);
1334 HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
1335 @Override
1336 public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles)
1337 throws IOException {
1338 return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1339 AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
1340 @Override
1341 protected void checkHFile(Path p) throws IOException {
1342 if (attemptedFirstHFile.compareAndSet(false, true)) {
1343 assertTrue(fs.delete(p, true)); // make sure delete happened.
1345 super.checkHFile(p);
1350 doQuarantineTest(tableName, hbck, 4, 0, 0, 0, 1); // 4 attempted, but 1 missing.
1351 hbck.close();
1355 * This creates and fixes a bad table with regions that has startkey == endkey
1357 @Test (timeout=180000)
1358 public void testDegenerateRegions() throws Exception {
1359 final TableName tableName = TableName.valueOf(name.getMethodName());
1360 try {
1361 setupTable(tableName);
1362 assertNoErrors(doFsck(conf, false));
1363 assertEquals(ROWKEYS.length, countRows());
1365 // Now let's mess it up, by adding a region with a duplicate startkey
1366 HRegionInfo hriDupe =
1367 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("B"));
1368 TEST_UTIL.assignRegion(hriDupe);
1370 ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
1371 TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
1373 HBaseFsck hbck = doFsck(conf,false);
1374 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
1375 HBaseFsck.ErrorReporter.ERROR_CODE.DEGENERATE_REGION,
1376 HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS,
1377 HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS });
1378 assertEquals(2, hbck.getOverlapGroups(tableName).size());
1379 assertEquals(ROWKEYS.length, countRows());
1381 // fix the degenerate region.
1382 doFsck(conf, true);
1384 // check that the degenerate region is gone and no data loss
1385 HBaseFsck hbck2 = doFsck(conf,false);
1386 assertNoErrors(hbck2);
1387 assertEquals(0, hbck2.getOverlapGroups(tableName).size());
1388 assertEquals(ROWKEYS.length, countRows());
1389 } finally {
1390 cleanupTable(tableName);
1395 * Test mission REGIONINFO_QUALIFIER in hbase:meta
1397 @Test (timeout=180000)
1398 public void testMissingRegionInfoQualifier() throws Exception {
1399 Connection connection = ConnectionFactory.createConnection(conf);
1400 final TableName tableName = TableName.valueOf(name.getMethodName());
1401 try {
1402 setupTable(tableName);
1404 // Mess it up by removing the RegionInfo for one region.
1405 final List<Delete> deletes = new LinkedList<>();
1406 Table meta = connection.getTable(TableName.META_TABLE_NAME, hbfsckExecutorService);
1407 MetaTableAccessor.fullScanRegions(connection, new MetaTableAccessor.Visitor() {
1409 @Override
1410 public boolean visit(Result rowResult) throws IOException {
1411 HRegionInfo hri = MetaTableAccessor.getHRegionInfo(rowResult);
1412 if (hri != null && !hri.getTable().isSystemTable()) {
1413 Delete delete = new Delete(rowResult.getRow());
1414 delete.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
1415 deletes.add(delete);
1417 return true;
1420 meta.delete(deletes);
1422 // Mess it up by creating a fake hbase:meta entry with no associated RegionInfo
1423 meta.put(new Put(Bytes.toBytes(tableName + ",,1361911384013.810e28f59a57da91c66"))
1424 .addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
1425 Bytes.toBytes("node1:60020")));
1426 meta.put(new Put(Bytes.toBytes(tableName + ",,1361911384013.810e28f59a57da91c66"))
1427 .addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
1428 Bytes.toBytes(1362150791183L)));
1429 meta.close();
1431 HBaseFsck hbck = doFsck(conf, false);
1432 assertTrue(hbck.getErrors().getErrorList().contains(
1433 HBaseFsck.ErrorReporter.ERROR_CODE.EMPTY_META_CELL));
1435 // fix reference file
1436 hbck = doFsck(conf, true);
1438 // check that reference file fixed
1439 assertFalse(hbck.getErrors().getErrorList().contains(
1440 HBaseFsck.ErrorReporter.ERROR_CODE.EMPTY_META_CELL));
1441 } finally {
1442 cleanupTable(tableName);
1444 connection.close();
1448 * Test pluggable error reporter. It can be plugged in
1449 * from system property or configuration.
1451 @Test (timeout=180000)
1452 public void testErrorReporter() throws Exception {
1453 try {
1454 MockErrorReporter.calledCount = 0;
1455 doFsck(conf, false);
1456 assertEquals(MockErrorReporter.calledCount, 0);
1458 conf.set("hbasefsck.errorreporter", MockErrorReporter.class.getName());
1459 doFsck(conf, false);
1460 assertTrue(MockErrorReporter.calledCount > 20);
1461 } finally {
1462 conf.set("hbasefsck.errorreporter",
1463 HBaseFsck.PrintingErrorReporter.class.getName());
1464 MockErrorReporter.calledCount = 0;
1468 @Test(timeout=180000)
1469 public void testCheckReplication() throws Exception {
1470 // check no errors
1471 HBaseFsck hbck = doFsck(conf, false);
1472 assertNoErrors(hbck);
1474 // create peer
1475 ReplicationAdmin replicationAdmin = new ReplicationAdmin(conf);
1476 Assert.assertEquals(0, replicationAdmin.getPeersCount());
1477 int zkPort = conf.getInt(HConstants.ZOOKEEPER_CLIENT_PORT,
1478 HConstants.DEFAULT_ZOOKEPER_CLIENT_PORT);
1479 ReplicationPeerConfig rpc = new ReplicationPeerConfig();
1480 rpc.setClusterKey("127.0.0.1:" + zkPort + ":/hbase");
1481 replicationAdmin.addPeer("1", rpc, null);
1482 replicationAdmin.getPeersCount();
1483 Assert.assertEquals(1, replicationAdmin.getPeersCount());
1485 // create replicator
1486 ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "Test Hbase Fsck", connection);
1487 ReplicationQueues repQueues =
1488 ReplicationFactory.getReplicationQueues(new ReplicationQueuesArguments(conf, connection,
1489 zkw));
1490 repQueues.init("server1");
1491 // queues for current peer, no errors
1492 repQueues.addLog("1", "file1");
1493 repQueues.addLog("1-server2", "file1");
1494 Assert.assertEquals(2, repQueues.getAllQueues().size());
1495 hbck = doFsck(conf, false);
1496 assertNoErrors(hbck);
1498 // queues for removed peer
1499 repQueues.addLog("2", "file1");
1500 repQueues.addLog("2-server2", "file1");
1501 Assert.assertEquals(4, repQueues.getAllQueues().size());
1502 hbck = doFsck(conf, false);
1503 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
1504 HBaseFsck.ErrorReporter.ERROR_CODE.UNDELETED_REPLICATION_QUEUE,
1505 HBaseFsck.ErrorReporter.ERROR_CODE.UNDELETED_REPLICATION_QUEUE });
1507 // fix the case
1508 hbck = doFsck(conf, true);
1509 hbck = doFsck(conf, false);
1510 assertNoErrors(hbck);
1511 // ensure only "2" is deleted
1512 Assert.assertEquals(2, repQueues.getAllQueues().size());
1513 Assert.assertNull(repQueues.getLogsInQueue("2"));
1514 Assert.assertNull(repQueues.getLogsInQueue("2-sever2"));
1516 replicationAdmin.removePeer("1");
1517 repQueues.removeAllQueues();
1518 zkw.close();
1519 replicationAdmin.close();
1523 * This creates and fixes a bad table with a missing region -- hole in meta
1524 * and data present but .regioninfo missing (an orphan hdfs region)in the fs.
1526 @Test(timeout=180000)
1527 public void testHDFSRegioninfoMissing() throws Exception {
1528 final TableName tableName = TableName.valueOf(name.getMethodName());
1529 try {
1530 setupTable(tableName);
1531 assertEquals(ROWKEYS.length, countRows());
1533 // Mess it up by leaving a hole in the meta data
1534 admin.disableTable(tableName);
1535 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true,
1536 true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
1537 admin.enableTable(tableName);
1539 HBaseFsck hbck = doFsck(conf, false);
1540 assertErrors(hbck,
1541 new HBaseFsck.ErrorReporter.ERROR_CODE[] {
1542 HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_HDFS_REGION,
1543 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1544 HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
1545 // holes are separate from overlap groups
1546 assertEquals(0, hbck.getOverlapGroups(tableName).size());
1548 // fix hole
1549 doFsck(conf, true);
1551 // check that hole fixed
1552 assertNoErrors(doFsck(conf, false));
1553 assertEquals(ROWKEYS.length, countRows());
1554 } finally {
1555 cleanupTable(tableName);
1560 * This creates and fixes a bad table with a region that is missing meta and
1561 * not assigned to a region server.
1563 @Test (timeout=180000)
1564 public void testNotInMetaOrDeployedHole() throws Exception {
1565 final TableName tableName = TableName.valueOf(name.getMethodName());
1566 try {
1567 setupTable(tableName);
1568 assertEquals(ROWKEYS.length, countRows());
1570 // Mess it up by leaving a hole in the meta data
1571 admin.disableTable(tableName);
1572 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true,
1573 true, false); // don't rm from fs
1574 admin.enableTable(tableName);
1576 HBaseFsck hbck = doFsck(conf, false);
1577 assertErrors(hbck,
1578 new HBaseFsck.ErrorReporter.ERROR_CODE[] {
1579 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1580 HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
1581 // holes are separate from overlap groups
1582 assertEquals(0, hbck.getOverlapGroups(tableName).size());
1584 // fix hole
1585 assertErrors(doFsck(conf, true),
1586 new HBaseFsck.ErrorReporter.ERROR_CODE[] {
1587 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1588 HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
1590 // check that hole fixed
1591 assertNoErrors(doFsck(conf, false));
1592 assertEquals(ROWKEYS.length, countRows());
1593 } finally {
1594 cleanupTable(tableName);
1598 @Test (timeout=180000)
1599 public void testCleanUpDaughtersNotInMetaAfterFailedSplit() throws Exception {
1600 final TableName tableName = TableName.valueOf(name.getMethodName());
1601 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1602 try {
1603 HTableDescriptor desc = new HTableDescriptor(tableName);
1604 desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f")));
1605 createTable(TEST_UTIL, desc, null);
1607 tbl = connection.getTable(desc.getTableName());
1608 for (int i = 0; i < 5; i++) {
1609 Put p1 = new Put(("r" + i).getBytes());
1610 p1.addColumn(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes());
1611 tbl.put(p1);
1613 admin.flush(desc.getTableName());
1614 List<HRegion> regions = cluster.getRegions(desc.getTableName());
1615 int serverWith = cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName());
1616 HRegionServer regionServer = cluster.getRegionServer(serverWith);
1617 byte[] parentRegionName = regions.get(0).getRegionInfo().getRegionName();
1618 cluster.getServerWith(parentRegionName);
1619 // Create daughters without adding to META table
1620 MasterProcedureEnv env = cluster.getMaster().getMasterProcedureExecutor().getEnvironment();
1621 SplitTableRegionProcedure splitR = new SplitTableRegionProcedure(
1622 env, regions.get(0).getRegionInfo(), Bytes.toBytes("r3"));
1623 splitR.prepareSplitRegion(env);
1624 splitR.setRegionStateToSplitting(env);
1625 splitR.closeParentRegionForSplit(env);
1626 splitR.createDaughterRegions(env);
1628 AssignmentManager am = cluster.getMaster().getAssignmentManager();
1629 for (RegionState state : am.getRegionStates().getRegionsInTransition()) {
1630 am.regionOffline(state.getRegion());
1633 Map<HRegionInfo, ServerName> regionsMap = new HashMap<>();
1634 regionsMap.put(regions.get(0).getRegionInfo(), regionServer.getServerName());
1635 am.assign(regionsMap);
1636 am.waitForAssignment(regions.get(0).getRegionInfo());
1637 HBaseFsck hbck = doFsck(conf, false);
1638 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
1639 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1640 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
1641 // holes are separate from overlap groups
1642 assertEquals(0, hbck.getOverlapGroups(tableName).size());
1644 // fix hole
1645 assertErrors(
1646 doFsck(conf, false, true, false, false, false, false, false, false, false, false, false,
1647 false, null),
1648 new HBaseFsck.ErrorReporter.ERROR_CODE[] {
1649 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1650 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
1652 // check that hole fixed
1653 assertNoErrors(doFsck(conf, false));
1654 assertEquals(5, countRows());
1655 } finally {
1656 if (tbl != null) {
1657 tbl.close();
1658 tbl = null;
1660 cleanupTable(tableName);
1665 * This creates fixes a bad table with a hole in meta.
1667 @Test (timeout=180000)
1668 public void testNotInMetaHole() throws Exception {
1669 final TableName tableName = TableName.valueOf(name.getMethodName());
1670 try {
1671 setupTable(tableName);
1672 assertEquals(ROWKEYS.length, countRows());
1674 // Mess it up by leaving a hole in the meta data
1675 admin.disableTable(tableName);
1676 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false,
1677 true, false); // don't rm from fs
1678 admin.enableTable(tableName);
1680 HBaseFsck hbck = doFsck(conf, false);
1681 assertErrors(hbck,
1682 new HBaseFsck.ErrorReporter.ERROR_CODE[] {
1683 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1684 HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
1685 // holes are separate from overlap groups
1686 assertEquals(0, hbck.getOverlapGroups(tableName).size());
1688 // fix hole
1689 assertErrors(doFsck(conf, true),
1690 new HBaseFsck.ErrorReporter.ERROR_CODE[] {
1691 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1692 HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
1694 // check that hole fixed
1695 assertNoErrors(doFsck(conf, false));
1696 assertEquals(ROWKEYS.length, countRows());
1697 } finally {
1698 cleanupTable(tableName);
1703 * This creates and fixes a bad table with a region that is in meta but has
1704 * no deployment or data hdfs
1706 @Test (timeout=180000)
1707 public void testNotInHdfs() throws Exception {
1708 final TableName tableName = TableName.valueOf(name.getMethodName());
1709 try {
1710 setupTable(tableName);
1711 assertEquals(ROWKEYS.length, countRows());
1713 // make sure data in regions, if in wal only there is no data loss
1714 admin.flush(tableName);
1716 // Mess it up by leaving a hole in the hdfs data
1717 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false,
1718 false, true); // don't rm meta
1720 HBaseFsck hbck = doFsck(conf, false);
1721 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
1722 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS});
1723 // holes are separate from overlap groups
1724 assertEquals(0, hbck.getOverlapGroups(tableName).size());
1726 // fix hole
1727 doFsck(conf, true);
1729 // check that hole fixed
1730 assertNoErrors(doFsck(conf,false));
1731 assertEquals(ROWKEYS.length - 2, countRows());
1732 } finally {
1733 cleanupTable(tableName);
1738 * This creates a table and simulates the race situation where a concurrent compaction or split
1739 * has removed an colfam dir before the corruption checker got to it.
1741 // Disabled because fails sporadically. Is this test right? Timing-wise, there could be no
1742 // files in a column family on initial creation -- as suggested by Matteo.
1743 @Ignore
1744 @Test(timeout=180000)
1745 public void testQuarantineMissingFamdir() throws Exception {
1746 final TableName tableName = TableName.valueOf(name.getMethodName());
1747 // inject a fault in the hfcc created.
1748 final FileSystem fs = FileSystem.get(conf);
1749 HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
1750 @Override
1751 public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles)
1752 throws IOException {
1753 return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1754 AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
1755 @Override
1756 protected void checkColFamDir(Path p) throws IOException {
1757 if (attemptedFirstHFile.compareAndSet(false, true)) {
1758 assertTrue(fs.delete(p, true)); // make sure delete happened.
1760 super.checkColFamDir(p);
1765 doQuarantineTest(tableName, hbck, 3, 0, 0, 0, 1);
1766 hbck.close();
1770 * This creates a table and simulates the race situation where a concurrent compaction or split
1771 * has removed a region dir before the corruption checker got to it.
1773 @Test(timeout=180000)
1774 public void testQuarantineMissingRegionDir() throws Exception {
1775 final TableName tableName = TableName.valueOf(name.getMethodName());
1776 // inject a fault in the hfcc created.
1777 final FileSystem fs = FileSystem.get(conf);
1778 HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
1779 @Override
1780 public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles)
1781 throws IOException {
1782 return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
1783 AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
1784 @Override
1785 protected void checkRegionDir(Path p) throws IOException {
1786 if (attemptedFirstHFile.compareAndSet(false, true)) {
1787 assertTrue(fs.delete(p, true)); // make sure delete happened.
1789 super.checkRegionDir(p);
1794 doQuarantineTest(tableName, hbck, 3, 0, 0, 0, 1);
1795 hbck.close();