3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 package org
.apache
.hadoop
.hbase
.util
;
22 import org
.apache
.commons
.io
.IOUtils
;
23 import org
.apache
.hadoop
.conf
.Configuration
;
24 import org
.apache
.hadoop
.fs
.FileStatus
;
25 import org
.apache
.hadoop
.fs
.FileSystem
;
26 import org
.apache
.hadoop
.fs
.Path
;
27 import org
.apache
.hadoop
.hbase
.HColumnDescriptor
;
28 import org
.apache
.hadoop
.hbase
.HConstants
;
29 import org
.apache
.hadoop
.hbase
.HRegionInfo
;
30 import org
.apache
.hadoop
.hbase
.HRegionLocation
;
31 import org
.apache
.hadoop
.hbase
.HTableDescriptor
;
32 import org
.apache
.hadoop
.hbase
.MetaTableAccessor
;
33 import org
.apache
.hadoop
.hbase
.MiniHBaseCluster
;
34 import org
.apache
.hadoop
.hbase
.ServerName
;
35 import org
.apache
.hadoop
.hbase
.TableName
;
36 import org
.apache
.hadoop
.hbase
.client
.ClusterConnection
;
37 import org
.apache
.hadoop
.hbase
.client
.Connection
;
38 import org
.apache
.hadoop
.hbase
.client
.ConnectionFactory
;
39 import org
.apache
.hadoop
.hbase
.client
.Delete
;
40 import org
.apache
.hadoop
.hbase
.client
.Get
;
41 import org
.apache
.hadoop
.hbase
.client
.Put
;
42 import org
.apache
.hadoop
.hbase
.client
.RegionLocator
;
43 import org
.apache
.hadoop
.hbase
.client
.Result
;
44 import org
.apache
.hadoop
.hbase
.client
.Table
;
45 import org
.apache
.hadoop
.hbase
.client
.replication
.ReplicationAdmin
;
46 import org
.apache
.hadoop
.hbase
.coprocessor
.CoprocessorHost
;
47 import org
.apache
.hadoop
.hbase
.io
.hfile
.TestHFile
;
48 import org
.apache
.hadoop
.hbase
.master
.AssignmentManager
;
49 import org
.apache
.hadoop
.hbase
.master
.RegionState
;
50 import org
.apache
.hadoop
.hbase
.master
.RegionStates
;
51 import org
.apache
.hadoop
.hbase
.master
.procedure
.MasterProcedureEnv
;
52 import org
.apache
.hadoop
.hbase
.master
.procedure
.SplitTableRegionProcedure
;
53 import org
.apache
.hadoop
.hbase
.regionserver
.HRegion
;
54 import org
.apache
.hadoop
.hbase
.regionserver
.HRegionServer
;
55 import org
.apache
.hadoop
.hbase
.regionserver
.TestEndToEndSplitTransaction
;
56 import org
.apache
.hadoop
.hbase
.replication
.ReplicationFactory
;
57 import org
.apache
.hadoop
.hbase
.replication
.ReplicationPeerConfig
;
58 import org
.apache
.hadoop
.hbase
.replication
.ReplicationQueues
;
59 import org
.apache
.hadoop
.hbase
.replication
.ReplicationQueuesArguments
;
60 import org
.apache
.hadoop
.hbase
.testclassification
.LargeTests
;
61 import org
.apache
.hadoop
.hbase
.testclassification
.MiscTests
;
62 import org
.apache
.hadoop
.hbase
.util
.hbck
.HFileCorruptionChecker
;
63 import org
.apache
.hadoop
.hbase
.util
.hbck
.HbckTestingUtil
;
64 import org
.apache
.hadoop
.hbase
.zookeeper
.ZooKeeperWatcher
;
65 import org
.junit
.AfterClass
;
66 import org
.junit
.Assert
;
67 import org
.junit
.Before
;
68 import org
.junit
.BeforeClass
;
69 import org
.junit
.Ignore
;
70 import org
.junit
.Rule
;
71 import org
.junit
.Test
;
72 import org
.junit
.experimental
.categories
.Category
;
73 import org
.junit
.rules
.TestName
;
75 import java
.io
.IOException
;
76 import java
.util
.ArrayList
;
77 import java
.util
.LinkedList
;
78 import java
.util
.List
;
79 import java
.util
.HashMap
;
81 import java
.util
.concurrent
.Callable
;
82 import java
.util
.concurrent
.CountDownLatch
;
83 import java
.util
.concurrent
.ExecutorService
;
84 import java
.util
.concurrent
.Executors
;
85 import java
.util
.concurrent
.Future
;
86 import java
.util
.concurrent
.ScheduledThreadPoolExecutor
;
87 import java
.util
.concurrent
.SynchronousQueue
;
88 import java
.util
.concurrent
.ThreadPoolExecutor
;
89 import java
.util
.concurrent
.TimeUnit
;
90 import java
.util
.concurrent
.atomic
.AtomicBoolean
;
92 import static org
.apache
.hadoop
.hbase
.util
.hbck
.HbckTestingUtil
.*;
93 import static org
.junit
.Assert
.*;
95 @Category({MiscTests
.class, LargeTests
.class})
96 public class TestHBaseFsckOneRS
extends BaseTestHBaseFsck
{
98 public TestName name
= new TestName();
101 public static void setUpBeforeClass() throws Exception
{
102 TEST_UTIL
.getConfiguration().set(CoprocessorHost
.MASTER_COPROCESSOR_CONF_KEY
,
103 MasterSyncObserver
.class.getName());
105 conf
.setInt("hbase.regionserver.handler.count", 2);
106 conf
.setInt("hbase.regionserver.metahandler.count", 30);
108 conf
.setInt("hbase.htable.threads.max", POOL_SIZE
);
109 conf
.setInt("hbase.hconnection.threads.max", 2 * POOL_SIZE
);
110 conf
.setInt("hbase.hbck.close.timeout", 2 * REGION_ONLINE_TIMEOUT
);
111 conf
.setInt(HConstants
.HBASE_RPC_TIMEOUT_KEY
, 8 * REGION_ONLINE_TIMEOUT
);
112 TEST_UTIL
.startMiniCluster(1);
114 tableExecutorService
= new ThreadPoolExecutor(1, POOL_SIZE
, 60, TimeUnit
.SECONDS
,
115 new SynchronousQueue
<>(), Threads
.newDaemonThreadFactory("testhbck"));
117 hbfsckExecutorService
= new ScheduledThreadPoolExecutor(POOL_SIZE
);
119 AssignmentManager assignmentManager
=
120 TEST_UTIL
.getHBaseCluster().getMaster().getAssignmentManager();
121 regionStates
= assignmentManager
.getRegionStates();
123 connection
= (ClusterConnection
) TEST_UTIL
.getConnection();
125 admin
= connection
.getAdmin();
126 admin
.setBalancerRunning(false, true);
128 TEST_UTIL
.waitUntilAllRegionsAssigned(TableName
.META_TABLE_NAME
);
129 TEST_UTIL
.waitUntilAllRegionsAssigned(TableName
.NAMESPACE_TABLE_NAME
);
133 public static void tearDownAfterClass() throws Exception
{
134 tableExecutorService
.shutdown();
135 hbfsckExecutorService
.shutdown();
137 TEST_UTIL
.shutdownMiniCluster();
141 public void setUp() {
142 EnvironmentEdgeManager
.reset();
147 * This creates a clean table and confirms that the table is clean.
149 @Test(timeout
=180000)
150 public void testHBaseFsckClean() throws Exception
{
151 assertNoErrors(doFsck(conf
, false));
152 TableName table
= TableName
.valueOf("tableClean");
154 HBaseFsck hbck
= doFsck(conf
, false);
155 assertNoErrors(hbck
);
158 assertEquals(ROWKEYS
.length
, countRows());
160 // We created 1 table, should be fine
161 hbck
= doFsck(conf
, false);
162 assertNoErrors(hbck
);
163 assertEquals(0, hbck
.getOverlapGroups(table
).size());
164 assertEquals(ROWKEYS
.length
, countRows());
171 * Test thread pooling in the case where there are more regions than threads
173 @Test (timeout
=180000)
174 public void testHbckThreadpooling() throws Exception
{
175 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
177 // Create table with 4 regions
178 setupTable(tableName
);
180 // limit number of threads to 1.
181 Configuration newconf
= new Configuration(conf
);
182 newconf
.setInt("hbasefsck.numthreads", 1);
183 assertNoErrors(doFsck(newconf
, false));
185 // We should pass without triggering a RejectedExecutionException
187 cleanupTable(tableName
);
191 @Test (timeout
=180000)
192 public void testTableWithNoRegions() throws Exception
{
193 // We might end up with empty regions in a table
194 // see also testNoHdfsTable()
195 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
197 // create table with one region
198 HTableDescriptor desc
= new HTableDescriptor(tableName
);
199 HColumnDescriptor hcd
= new HColumnDescriptor(Bytes
.toString(FAM
));
200 desc
.addFamily(hcd
); // If a table has no CF's it doesn't get checked
201 createTable(TEST_UTIL
, desc
, null);
202 tbl
= connection
.getTable(tableName
, tableExecutorService
);
204 // Mess it up by leaving a hole in the assignment, meta, and hdfs data
205 deleteRegion(conf
, tbl
.getTableDescriptor(), HConstants
.EMPTY_START_ROW
,
206 HConstants
.EMPTY_END_ROW
, false, false, true);
208 HBaseFsck hbck
= doFsck(conf
, false);
209 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
210 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_HDFS
});
217 // check that hole fixed
218 assertNoErrors(doFsck(conf
, false));
220 cleanupTable(tableName
);
224 @Test (timeout
=180000)
225 public void testHbckFixOrphanTable() throws Exception
{
226 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
227 FileSystem fs
= null;
228 Path tableinfo
= null;
230 setupTable(tableName
);
232 Path hbaseTableDir
= FSUtils
.getTableDir(
233 FSUtils
.getRootDir(conf
), tableName
);
234 fs
= hbaseTableDir
.getFileSystem(conf
);
235 FileStatus status
= FSTableDescriptors
.getTableInfoPath(fs
, hbaseTableDir
);
236 tableinfo
= status
.getPath();
237 fs
.rename(tableinfo
, new Path("/.tableinfo"));
239 //to report error if .tableinfo is missing.
240 HBaseFsck hbck
= doFsck(conf
, false);
241 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
242 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NO_TABLEINFO_FILE
});
244 // fix OrphanTable with default .tableinfo (htd not yet cached on master)
245 hbck
= doFsck(conf
, true);
246 assertNoErrors(hbck
);
248 status
= FSTableDescriptors
.getTableInfoPath(fs
, hbaseTableDir
);
249 assertNotNull(status
);
251 HTableDescriptor htd
= admin
.getTableDescriptor(tableName
);
252 htd
.setValue("NOT_DEFAULT", "true");
253 admin
.disableTable(tableName
);
254 admin
.modifyTable(tableName
, htd
);
255 admin
.enableTable(tableName
);
256 fs
.delete(status
.getPath(), true);
258 // fix OrphanTable with cache
259 htd
= admin
.getTableDescriptor(tableName
); // warms up cached htd on master
260 hbck
= doFsck(conf
, true);
261 assertNoErrors(hbck
);
262 status
= FSTableDescriptors
.getTableInfoPath(fs
, hbaseTableDir
);
263 assertNotNull(status
);
264 htd
= admin
.getTableDescriptor(tableName
);
265 assertEquals(htd
.getValue("NOT_DEFAULT"), "true");
268 fs
.rename(new Path("/.tableinfo"), tableinfo
);
270 cleanupTable(tableName
);
274 @Test (timeout
=180000)
275 public void testReadOnlyProperty() throws Exception
{
276 HBaseFsck hbck
= doFsck(conf
, false);
277 Assert
.assertEquals("shouldIgnorePreCheckPermission", true,
278 hbck
.shouldIgnorePreCheckPermission());
280 hbck
= doFsck(conf
, true);
281 Assert
.assertEquals("shouldIgnorePreCheckPermission", false,
282 hbck
.shouldIgnorePreCheckPermission());
284 hbck
= doFsck(conf
, true);
285 hbck
.setIgnorePreCheckPermission(true);
286 Assert
.assertEquals("shouldIgnorePreCheckPermission", true,
287 hbck
.shouldIgnorePreCheckPermission());
291 * This creates and fixes a bad table where a region is completely contained
292 * by another region, and there is a hole (sort of like a bad split)
294 @Test (timeout
=180000)
295 public void testOverlapAndOrphan() throws Exception
{
296 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
298 setupTable(tableName
);
299 assertEquals(ROWKEYS
.length
, countRows());
301 // Mess it up by creating an overlap in the metadata
302 admin
.disableTable(tableName
);
303 deleteRegion(conf
, tbl
.getTableDescriptor(), Bytes
.toBytes("A"), Bytes
.toBytes("B"), true,
304 true, false, true, HRegionInfo
.DEFAULT_REPLICA_ID
);
305 admin
.enableTable(tableName
);
307 HRegionInfo hriOverlap
=
308 createRegion(tbl
.getTableDescriptor(), Bytes
.toBytes("A2"), Bytes
.toBytes("B"));
309 TEST_UTIL
.assignRegion(hriOverlap
);
311 ServerName server
= regionStates
.getRegionServerOfRegion(hriOverlap
);
312 TEST_UTIL
.assertRegionOnServer(hriOverlap
, server
, REGION_ONLINE_TIMEOUT
);
314 HBaseFsck hbck
= doFsck(conf
, false);
316 new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
317 HBaseFsck
.ErrorReporter
.ERROR_CODE
.ORPHAN_HDFS_REGION
,
318 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_META_OR_DEPLOYED
,
319 HBaseFsck
.ErrorReporter
.ERROR_CODE
.HOLE_IN_REGION_CHAIN
});
324 // verify that overlaps are fixed
325 HBaseFsck hbck2
= doFsck(conf
,false);
326 assertNoErrors(hbck2
);
327 assertEquals(0, hbck2
.getOverlapGroups(tableName
).size());
328 assertEquals(ROWKEYS
.length
, countRows());
330 cleanupTable(tableName
);
335 * This creates and fixes a bad table where a region overlaps two regions --
336 * a start key contained in another region and its end key is contained in
337 * yet another region.
339 @Test (timeout
=180000)
340 public void testCoveredStartKey() throws Exception
{
341 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
343 setupTable(tableName
);
344 assertEquals(ROWKEYS
.length
, countRows());
346 // Mess it up by creating an overlap in the metadata
347 HRegionInfo hriOverlap
=
348 createRegion(tbl
.getTableDescriptor(), Bytes
.toBytes("A2"), Bytes
.toBytes("B2"));
349 TEST_UTIL
.assignRegion(hriOverlap
);
351 ServerName server
= regionStates
.getRegionServerOfRegion(hriOverlap
);
352 TEST_UTIL
.assertRegionOnServer(hriOverlap
, server
, REGION_ONLINE_TIMEOUT
);
354 HBaseFsck hbck
= doFsck(conf
, false);
355 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
356 HBaseFsck
.ErrorReporter
.ERROR_CODE
.OVERLAP_IN_REGION_CHAIN
,
357 HBaseFsck
.ErrorReporter
.ERROR_CODE
.OVERLAP_IN_REGION_CHAIN
});
358 assertEquals(3, hbck
.getOverlapGroups(tableName
).size());
359 assertEquals(ROWKEYS
.length
, countRows());
364 // verify that overlaps are fixed
365 HBaseFsck hbck2
= doFsck(conf
, false);
366 assertErrors(hbck2
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[0]);
367 assertEquals(0, hbck2
.getOverlapGroups(tableName
).size());
368 assertEquals(ROWKEYS
.length
, countRows());
370 cleanupTable(tableName
);
375 * This creates and fixes a bad table with a missing region -- hole in meta
376 * and data missing in the fs.
378 @Test (timeout
=180000)
379 public void testRegionHole() throws Exception
{
380 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
382 setupTable(tableName
);
383 assertEquals(ROWKEYS
.length
, countRows());
385 // Mess it up by leaving a hole in the assignment, meta, and hdfs data
386 admin
.disableTable(tableName
);
387 deleteRegion(conf
, tbl
.getTableDescriptor(), Bytes
.toBytes("B"), Bytes
.toBytes("C"), true,
389 admin
.enableTable(tableName
);
391 HBaseFsck hbck
= doFsck(conf
, false);
392 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
393 HBaseFsck
.ErrorReporter
.ERROR_CODE
.HOLE_IN_REGION_CHAIN
});
394 // holes are separate from overlap groups
395 assertEquals(0, hbck
.getOverlapGroups(tableName
).size());
400 // check that hole fixed
401 assertNoErrors(doFsck(conf
,false));
402 assertEquals(ROWKEYS
.length
- 2, countRows()); // lost a region so lost a row
404 cleanupTable(tableName
);
409 * The region is not deployed when the table is disabled.
411 @Test (timeout
=180000)
412 public void testRegionShouldNotBeDeployed() throws Exception
{
413 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
415 LOG
.info("Starting testRegionShouldNotBeDeployed.");
416 MiniHBaseCluster cluster
= TEST_UTIL
.getHBaseCluster();
417 assertTrue(cluster
.waitForActiveAndReadyMaster());
420 byte[][] SPLIT_KEYS
= new byte[][] { new byte[0], Bytes
.toBytes("aaa"),
421 Bytes
.toBytes("bbb"), Bytes
.toBytes("ccc"), Bytes
.toBytes("ddd") };
422 HTableDescriptor htdDisabled
= new HTableDescriptor(tableName
);
423 htdDisabled
.addFamily(new HColumnDescriptor(FAM
));
425 // Write the .tableinfo
426 FSTableDescriptors fstd
= new FSTableDescriptors(conf
);
427 fstd
.createTableDescriptor(htdDisabled
);
428 List
<HRegionInfo
> disabledRegions
=
429 TEST_UTIL
.createMultiRegionsInMeta(conf
, htdDisabled
, SPLIT_KEYS
);
431 // Let's just assign everything to first RS
432 HRegionServer hrs
= cluster
.getRegionServer(0);
434 // Create region files.
435 admin
.disableTable(tableName
);
436 admin
.enableTable(tableName
);
438 // Disable the table and close its regions
439 admin
.disableTable(tableName
);
440 HRegionInfo region
= disabledRegions
.remove(0);
441 byte[] regionName
= region
.getRegionName();
443 // The region should not be assigned currently
444 assertTrue(cluster
.getServerWith(regionName
) == -1);
446 // Directly open a region on a region server.
447 // If going through AM/ZK, the region won't be open.
448 // Even it is opened, AM will close it which causes
449 // flakiness of this test.
450 HRegion r
= HRegion
.openHRegion(
451 region
, htdDisabled
, hrs
.getWAL(region
), conf
);
452 hrs
.addToOnlineRegions(r
);
454 HBaseFsck hbck
= doFsck(conf
, false);
455 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
456 HBaseFsck
.ErrorReporter
.ERROR_CODE
.SHOULD_NOT_BE_DEPLOYED
});
462 assertNoErrors(doFsck(conf
, false));
464 admin
.enableTable(tableName
);
465 cleanupTable(tableName
);
470 * This test makes sure that parallel instances of Hbck is disabled.
474 @Test(timeout
=180000)
475 public void testParallelHbck() throws Exception
{
476 final ExecutorService service
;
477 final Future
<HBaseFsck
> hbck1
,hbck2
;
479 class RunHbck
implements Callable
<HBaseFsck
> {
482 public HBaseFsck
call(){
483 Configuration c
= new Configuration(conf
);
484 c
.setInt("hbase.hbck.lockfile.attempts", 1);
485 // HBASE-13574 found that in HADOOP-2.6 and later, the create file would internally retry.
486 // To avoid flakiness of the test, set low max wait time.
487 c
.setInt("hbase.hbck.lockfile.maxwaittime", 3);
489 return doFsck(c
, true); // Exclusive hbck only when fixing
490 } catch(Exception e
){
491 if (e
.getMessage().contains("Duplicate hbck")) {
495 // If we reach here, then an exception was caught
500 service
= Executors
.newFixedThreadPool(2);
501 hbck1
= service
.submit(new RunHbck());
502 hbck2
= service
.submit(new RunHbck());
504 //wait for 15 seconds, for both hbck calls finish
505 service
.awaitTermination(15, TimeUnit
.SECONDS
);
506 HBaseFsck h1
= hbck1
.get();
507 HBaseFsck h2
= hbck2
.get();
508 // Make sure only one of the calls was successful
509 assert(h1
== null || h2
== null);
511 assert(h1
.getRetCode() >= 0);
514 assert(h2
.getRetCode() >= 0);
519 * This test makes sure that with enough retries both parallel instances
520 * of hbck will be completed successfully.
524 @Test (timeout
=180000)
525 public void testParallelWithRetriesHbck() throws Exception
{
526 final ExecutorService service
;
527 final Future
<HBaseFsck
> hbck1
,hbck2
;
529 // With the ExponentialBackoffPolicyWithLimit (starting with 200 milliseconds sleep time, and
530 // max sleep time of 5 seconds), we can retry around 15 times within 80 seconds before bail out.
532 // Note: the reason to use 80 seconds is that in HADOOP-2.6 and later, the create file would
533 // retry up to HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds). See HBASE-13574 for more
535 final int timeoutInSeconds
= 80;
536 final int sleepIntervalInMilliseconds
= 200;
537 final int maxSleepTimeInMilliseconds
= 6000;
538 final int maxRetryAttempts
= 15;
540 class RunHbck
implements Callable
<HBaseFsck
>{
543 public HBaseFsck
call() throws Exception
{
544 // Increase retry attempts to make sure the non-active hbck doesn't get starved
545 Configuration c
= new Configuration(conf
);
546 c
.setInt("hbase.hbck.lockfile.maxwaittime", timeoutInSeconds
);
547 c
.setInt("hbase.hbck.lockfile.attempt.sleep.interval", sleepIntervalInMilliseconds
);
548 c
.setInt("hbase.hbck.lockfile.attempt.maxsleeptime", maxSleepTimeInMilliseconds
);
549 c
.setInt("hbase.hbck.lockfile.attempts", maxRetryAttempts
);
550 return doFsck(c
, false);
554 service
= Executors
.newFixedThreadPool(2);
555 hbck1
= service
.submit(new RunHbck());
556 hbck2
= service
.submit(new RunHbck());
558 //wait for some time, for both hbck calls finish
559 service
.awaitTermination(timeoutInSeconds
* 2, TimeUnit
.SECONDS
);
560 HBaseFsck h1
= hbck1
.get();
561 HBaseFsck h2
= hbck2
.get();
562 // Both should be successful
565 assert(h1
.getRetCode() >= 0);
566 assert(h2
.getRetCode() >= 0);
570 @Test (timeout
= 180000)
571 public void testRegionBoundariesCheck() throws Exception
{
572 HBaseFsck hbck
= doFsck(conf
, false);
573 assertNoErrors(hbck
); // no errors
575 hbck
.connect(); // need connection to have access to META
576 hbck
.checkRegionBoundaries();
577 } catch (IllegalArgumentException e
) {
578 if (e
.getMessage().endsWith("not a valid DFS filename.")) {
579 fail("Table directory path is not valid." + e
.getMessage());
587 * test region boundaries and make sure store file had been created.
590 @Test(timeout
= 180000)
591 public void testRegionBoundariesCheckWithFlushTable() throws Exception
{
592 HBaseFsck hbck
= doFsck(conf
, false);
593 assertNoErrors(hbck
); // no errors
594 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
596 setupTable(tableName
);
597 admin
.flush(tableName
);
598 hbck
.connect(); // need connection to have access to META
599 hbck
.checkRegionBoundaries();
600 assertNoErrors(hbck
); // no errors
601 } catch (IllegalArgumentException e
) {
602 if (e
.getMessage().endsWith("not a valid DFS filename.")) {
603 fail("Table directory path is not valid." + e
.getMessage());
610 @Test (timeout
=180000)
611 public void testHbckAfterRegionMerge() throws Exception
{
612 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
615 // disable CatalogJanitor
616 TEST_UTIL
.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
617 setupTable(tableName
);
618 assertEquals(ROWKEYS
.length
, countRows());
620 try(RegionLocator rl
= connection
.getRegionLocator(tbl
.getName())) {
621 // make sure data in regions, if in wal only there is no data loss
622 admin
.flush(tableName
);
623 HRegionInfo region1
= rl
.getRegionLocation(Bytes
.toBytes("A")).getRegionInfo();
624 HRegionInfo region2
= rl
.getRegionLocation(Bytes
.toBytes("B")).getRegionInfo();
626 int regionCountBeforeMerge
= rl
.getAllRegionLocations().size();
628 assertNotEquals(region1
, region2
);
631 admin
.mergeRegionsAsync(
632 region1
.getEncodedNameAsBytes(), region2
.getEncodedNameAsBytes(), false);
634 // wait until region merged
635 long timeout
= System
.currentTimeMillis() + 30 * 1000;
637 if (rl
.getAllRegionLocations().size() < regionCountBeforeMerge
) {
639 } else if (System
.currentTimeMillis() > timeout
) {
640 fail("Time out waiting on region " + region1
.getEncodedName() + " and " + region2
641 .getEncodedName() + " be merged");
646 assertEquals(ROWKEYS
.length
, countRows());
648 HBaseFsck hbck
= doFsck(conf
, false);
649 assertNoErrors(hbck
); // no errors
653 TEST_UTIL
.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true);
654 cleanupTable(tableName
);
655 IOUtils
.closeQuietly(meta
);
659 * This creates entries in hbase:meta with no hdfs data. This should cleanly
662 @Test (timeout
=180000)
663 public void testNoHdfsTable() throws Exception
{
664 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
665 setupTable(tableName
);
666 assertEquals(ROWKEYS
.length
, countRows());
668 // make sure data in regions, if in wal only there is no data loss
669 admin
.flush(tableName
);
671 // Mess it up by deleting hdfs dirs
672 deleteRegion(conf
, tbl
.getTableDescriptor(), Bytes
.toBytes(""),
673 Bytes
.toBytes("A"), false, false, true); // don't rm meta
674 deleteRegion(conf
, tbl
.getTableDescriptor(), Bytes
.toBytes("A"),
675 Bytes
.toBytes("B"), false, false, true); // don't rm meta
676 deleteRegion(conf
, tbl
.getTableDescriptor(), Bytes
.toBytes("B"),
677 Bytes
.toBytes("C"), false, false, true); // don't rm meta
678 deleteRegion(conf
, tbl
.getTableDescriptor(), Bytes
.toBytes("C"),
679 Bytes
.toBytes(""), false, false, true); // don't rm meta
681 // also remove the table directory in hdfs
682 deleteTableDir(tableName
);
684 HBaseFsck hbck
= doFsck(conf
, false);
685 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
686 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_HDFS
,
687 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_HDFS
,
688 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_HDFS
,
689 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_HDFS
,
690 HBaseFsck
.ErrorReporter
.ERROR_CODE
.ORPHAN_TABLE_STATE
, });
691 // holes are separate from overlap groups
692 assertEquals(0, hbck
.getOverlapGroups(tableName
).size());
695 doFsck(conf
, true); // detect dangling regions and remove those
697 // check that hole fixed
698 assertNoErrors(doFsck(conf
,false));
699 assertFalse("Table " + tableName
+ " should have been deleted", admin
.tableExists(tableName
));
703 * when the hbase.version file missing, It is fix the fault.
705 @Test (timeout
=180000)
706 public void testNoVersionFile() throws Exception
{
707 // delete the hbase.version file
708 Path rootDir
= FSUtils
.getRootDir(conf
);
709 FileSystem fs
= rootDir
.getFileSystem(conf
);
710 Path versionFile
= new Path(rootDir
, HConstants
.VERSION_FILE_NAME
);
711 fs
.delete(versionFile
, true);
714 HBaseFsck hbck
= doFsck(conf
, false);
715 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
716 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NO_VERSION_FILE
});
717 // fix hbase.version missing
720 // no version file fixed
721 assertNoErrors(doFsck(conf
, false));
724 @Test (timeout
=180000)
725 public void testNoTableState() throws Exception
{
726 // delete the hbase.version file
727 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
729 setupTable(tableName
);
730 // make sure data in regions, if in wal only there is no data loss
731 admin
.flush(tableName
);
733 MetaTableAccessor
.deleteTableState(TEST_UTIL
.getConnection(), tableName
);
736 HBaseFsck hbck
= doFsck(conf
, false);
737 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
738 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NO_TABLE_STATE
});
739 // fix table state missing
742 assertNoErrors(doFsck(conf
, false));
743 assertTrue(TEST_UTIL
.getAdmin().isTableEnabled(tableName
));
745 cleanupTable(tableName
);
750 * This creates two tables and mess both of them and fix them one by one
752 @Test (timeout
=180000)
753 public void testFixByTable() throws Exception
{
754 final TableName tableName1
= TableName
.valueOf(name
.getMethodName() + "1");
755 final TableName tableName2
= TableName
.valueOf(name
.getMethodName() + "2");
757 setupTable(tableName1
);
758 // make sure data in regions, if in wal only there is no data loss
759 admin
.flush(tableName1
);
760 // Mess them up by leaving a hole in the hdfs data
761 deleteRegion(conf
, tbl
.getTableDescriptor(), Bytes
.toBytes("B"),
762 Bytes
.toBytes("C"), false, false, true); // don't rm meta
764 setupTable(tableName2
);
765 // make sure data in regions, if in wal only there is no data loss
766 admin
.flush(tableName2
);
767 // Mess them up by leaving a hole in the hdfs data
768 deleteRegion(conf
, tbl
.getTableDescriptor(), Bytes
.toBytes("B"), Bytes
.toBytes("C"), false,
769 false, true); // don't rm meta
771 HBaseFsck hbck
= doFsck(conf
, false);
772 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
773 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_HDFS
,
774 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_HDFS
});
776 // fix hole in table 1
777 doFsck(conf
, true, tableName1
);
778 // check that hole in table 1 fixed
779 assertNoErrors(doFsck(conf
, false, tableName1
));
780 // check that hole in table 2 still there
781 assertErrors(doFsck(conf
, false, tableName2
), new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
782 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_HDFS
});
784 // fix hole in table 2
785 doFsck(conf
, true, tableName2
);
786 // check that hole in both tables fixed
787 assertNoErrors(doFsck(conf
, false));
788 assertEquals(ROWKEYS
.length
- 2, countRows());
790 cleanupTable(tableName1
);
791 cleanupTable(tableName2
);
795 * A split parent in meta, in hdfs, and not deployed
797 @Test (timeout
=180000)
798 public void testLingeringSplitParent() throws Exception
{
799 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
802 setupTable(tableName
);
803 assertEquals(ROWKEYS
.length
, countRows());
805 // make sure data in regions, if in wal only there is no data loss
806 admin
.flush(tableName
);
808 HRegionLocation location
;
809 try(RegionLocator rl
= connection
.getRegionLocator(tbl
.getName())) {
810 location
= rl
.getRegionLocation(Bytes
.toBytes("B"));
813 // Delete one region from meta, but not hdfs, unassign it.
814 deleteRegion(conf
, tbl
.getTableDescriptor(), Bytes
.toBytes("B"),
815 Bytes
.toBytes("C"), true, true, false);
817 // Create a new meta entry to fake it as a split parent.
818 meta
= connection
.getTable(TableName
.META_TABLE_NAME
, tableExecutorService
);
819 HRegionInfo hri
= location
.getRegionInfo();
821 HRegionInfo a
= new HRegionInfo(tbl
.getName(),
822 Bytes
.toBytes("B"), Bytes
.toBytes("BM"));
823 HRegionInfo b
= new HRegionInfo(tbl
.getName(),
824 Bytes
.toBytes("BM"), Bytes
.toBytes("C"));
826 hri
.setOffline(true);
829 MetaTableAccessor
.addRegionToMeta(meta
, hri
, a
, b
);
831 admin
.flush(TableName
.META_TABLE_NAME
);
833 HBaseFsck hbck
= doFsck(conf
, false);
834 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
835 HBaseFsck
.ErrorReporter
.ERROR_CODE
.LINGERING_SPLIT_PARENT
,
836 HBaseFsck
.ErrorReporter
.ERROR_CODE
.HOLE_IN_REGION_CHAIN
});
838 // regular repair cannot fix lingering split parent
839 hbck
= doFsck(conf
, true);
840 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
841 HBaseFsck
.ErrorReporter
.ERROR_CODE
.LINGERING_SPLIT_PARENT
,
842 HBaseFsck
.ErrorReporter
.ERROR_CODE
.HOLE_IN_REGION_CHAIN
});
843 assertFalse(hbck
.shouldRerun());
844 hbck
= doFsck(conf
, false);
845 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
846 HBaseFsck
.ErrorReporter
.ERROR_CODE
.LINGERING_SPLIT_PARENT
,
847 HBaseFsck
.ErrorReporter
.ERROR_CODE
.HOLE_IN_REGION_CHAIN
});
849 // fix lingering split parent
850 hbck
= new HBaseFsck(conf
, hbfsckExecutorService
);
852 HBaseFsck
.setDisplayFullReport(); // i.e. -details
854 hbck
.setFixSplitParents(true);
856 assertTrue(hbck
.shouldRerun());
859 Get get
= new Get(hri
.getRegionName());
860 Result result
= meta
.get(get
);
861 assertTrue(result
.getColumnCells(HConstants
.CATALOG_FAMILY
,
862 HConstants
.SPLITA_QUALIFIER
).isEmpty());
863 assertTrue(result
.getColumnCells(HConstants
.CATALOG_FAMILY
,
864 HConstants
.SPLITB_QUALIFIER
).isEmpty());
865 admin
.flush(TableName
.META_TABLE_NAME
);
870 // check that all are fixed
871 assertNoErrors(doFsck(conf
, false));
872 assertEquals(ROWKEYS
.length
, countRows());
874 cleanupTable(tableName
);
875 IOUtils
.closeQuietly(meta
);
880 * Tests that LINGERING_SPLIT_PARENT is not erroneously reported for
881 * valid cases where the daughters are there.
883 @Test (timeout
=180000)
884 public void testValidLingeringSplitParent() throws Exception
{
885 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
888 setupTable(tableName
);
889 assertEquals(ROWKEYS
.length
, countRows());
891 // make sure data in regions, if in wal only there is no data loss
892 admin
.flush(tableName
);
894 try(RegionLocator rl
= connection
.getRegionLocator(tbl
.getName())) {
895 HRegionLocation location
= rl
.getRegionLocation(Bytes
.toBytes("B"));
897 meta
= connection
.getTable(TableName
.META_TABLE_NAME
, tableExecutorService
);
898 HRegionInfo hri
= location
.getRegionInfo();
900 // do a regular split
901 byte[] regionName
= location
.getRegionInfo().getRegionName();
902 admin
.splitRegion(location
.getRegionInfo().getRegionName(), Bytes
.toBytes("BM"));
903 TestEndToEndSplitTransaction
.blockUntilRegionSplit(conf
, 60000, regionName
, true);
905 // TODO: fixHdfsHoles does not work against splits, since the parent dir lingers on
906 // for some time until children references are deleted. HBCK erroneously sees this as
907 // overlapping regions
908 HBaseFsck hbck
= doFsck(conf
, true, true, false, false, false, true, true, true, true,
909 false, false, false, null);
910 // no LINGERING_SPLIT_PARENT reported
911 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {}); //no LINGERING_SPLIT_PARENT reported
913 // assert that the split hbase:meta entry is still there.
914 Get get
= new Get(hri
.getRegionName());
915 Result result
= meta
.get(get
);
916 assertNotNull(result
);
917 assertNotNull(MetaTableAccessor
.getHRegionInfo(result
));
919 assertEquals(ROWKEYS
.length
, countRows());
921 // assert that we still have the split regions
922 //SPLITS + 1 is # regions pre-split.
923 assertEquals(rl
.getStartKeys().length
, SPLITS
.length
+ 1 + 1);
924 assertNoErrors(doFsck(conf
, false));
927 cleanupTable(tableName
);
928 IOUtils
.closeQuietly(meta
);
933 * Split crashed after write to hbase:meta finished for the parent region, but
934 * failed to write daughters (pre HBASE-7721 codebase)
937 public void testSplitDaughtersNotInMeta() throws Exception
{
938 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
939 Table meta
= connection
.getTable(TableName
.META_TABLE_NAME
, tableExecutorService
);
941 setupTable(tableName
);
942 assertEquals(ROWKEYS
.length
, countRows());
944 // make sure data in regions, if in wal only there is no data loss
945 admin
.flush(tableName
);
947 try(RegionLocator rl
= connection
.getRegionLocator(tbl
.getName())) {
948 HRegionLocation location
= rl
.getRegionLocation(Bytes
.toBytes("B"));
950 HRegionInfo hri
= location
.getRegionInfo();
952 // Disable CatalogJanitor to prevent it from cleaning up the parent region
954 admin
.enableCatalogJanitor(false);
956 // do a regular split
957 byte[] regionName
= location
.getRegionInfo().getRegionName();
958 admin
.splitRegion(location
.getRegionInfo().getRegionName(), Bytes
.toBytes("BM"));
959 TestEndToEndSplitTransaction
.blockUntilRegionSplit(conf
, 60000, regionName
, true);
961 PairOfSameType
<HRegionInfo
> daughters
= MetaTableAccessor
.getDaughterRegions(
962 meta
.get(new Get(regionName
)));
964 // Delete daughter regions from meta, but not hdfs, unassign it.
967 rl
.getRegionLocation(daughters
.getFirst().getStartKey()).getServerName();
968 ServerName secondSN
=
969 rl
.getRegionLocation(daughters
.getSecond().getStartKey()).getServerName();
971 undeployRegion(connection
, firstSN
, daughters
.getFirst());
972 undeployRegion(connection
, secondSN
, daughters
.getSecond());
974 List
<Delete
> deletes
= new ArrayList
<>(2);
975 deletes
.add(new Delete(daughters
.getFirst().getRegionName()));
976 deletes
.add(new Delete(daughters
.getSecond().getRegionName()));
977 meta
.delete(deletes
);
979 // Remove daughters from regionStates
980 RegionStates regionStates
= TEST_UTIL
.getMiniHBaseCluster().getMaster().
981 getAssignmentManager().getRegionStates();
982 regionStates
.deleteRegion(daughters
.getFirst());
983 regionStates
.deleteRegion(daughters
.getSecond());
985 HBaseFsck hbck
= doFsck(conf
, false);
986 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
987 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_META_OR_DEPLOYED
,
988 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_META_OR_DEPLOYED
,
989 HBaseFsck
.ErrorReporter
.ERROR_CODE
.HOLE_IN_REGION_CHAIN
}); //no LINGERING_SPLIT_PARENT
991 // now fix it. The fix should not revert the region split, but add daughters to META
992 hbck
= doFsck(conf
, true, true, false, false, false, false, false, false, false,
993 false, false, false, null);
994 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
995 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_META_OR_DEPLOYED
,
996 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_META_OR_DEPLOYED
,
997 HBaseFsck
.ErrorReporter
.ERROR_CODE
.HOLE_IN_REGION_CHAIN
});
999 // assert that the split hbase:meta entry is still there.
1000 Get get
= new Get(hri
.getRegionName());
1001 Result result
= meta
.get(get
);
1002 assertNotNull(result
);
1003 assertNotNull(MetaTableAccessor
.getHRegionInfo(result
));
1005 assertEquals(ROWKEYS
.length
, countRows());
1007 // assert that we still have the split regions
1008 assertEquals(rl
.getStartKeys().length
, SPLITS
.length
+ 1 + 1); //SPLITS + 1 is # regions
1010 assertNoErrors(doFsck(conf
, false)); //should be fixed by now
1013 admin
.enableCatalogJanitor(true);
1015 cleanupTable(tableName
);
1020 * This creates and fixes a bad table with a missing region which is the 1st region -- hole in
1021 * meta and data missing in the fs.
1023 @Test(timeout
=120000)
1024 public void testMissingFirstRegion() throws Exception
{
1025 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
1027 setupTable(tableName
);
1028 assertEquals(ROWKEYS
.length
, countRows());
1030 // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1031 admin
.disableTable(tableName
);
1032 deleteRegion(conf
, tbl
.getTableDescriptor(), Bytes
.toBytes(""), Bytes
.toBytes("A"), true,
1034 admin
.enableTable(tableName
);
1036 HBaseFsck hbck
= doFsck(conf
, false);
1037 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
1038 HBaseFsck
.ErrorReporter
.ERROR_CODE
.FIRST_REGION_STARTKEY_NOT_EMPTY
});
1041 // check that hole fixed
1042 assertNoErrors(doFsck(conf
, false));
1044 cleanupTable(tableName
);
1049 * This creates and fixes a bad table with a missing region which is the 1st region -- hole in
1050 * meta and data missing in the fs.
1052 @Test(timeout
=120000)
1053 public void testRegionDeployedNotInHdfs() throws Exception
{
1054 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
1056 setupTable(tableName
);
1057 admin
.flush(tableName
);
1059 // Mess it up by deleting region dir
1060 deleteRegion(conf
, tbl
.getTableDescriptor(),
1061 HConstants
.EMPTY_START_ROW
, Bytes
.toBytes("A"), false,
1064 HBaseFsck hbck
= doFsck(conf
, false);
1065 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
1066 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_HDFS
});
1069 // check that hole fixed
1070 assertNoErrors(doFsck(conf
, false));
1072 cleanupTable(tableName
);
1077 * This creates and fixes a bad table with missing last region -- hole in meta and data missing in
1080 @Test(timeout
=120000)
1081 public void testMissingLastRegion() throws Exception
{
1082 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
1084 setupTable(tableName
);
1085 assertEquals(ROWKEYS
.length
, countRows());
1087 // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1088 admin
.disableTable(tableName
);
1089 deleteRegion(conf
, tbl
.getTableDescriptor(), Bytes
.toBytes("C"), Bytes
.toBytes(""), true,
1091 admin
.enableTable(tableName
);
1093 HBaseFsck hbck
= doFsck(conf
, false);
1094 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
1095 HBaseFsck
.ErrorReporter
.ERROR_CODE
.LAST_REGION_ENDKEY_NOT_EMPTY
});
1098 // check that hole fixed
1099 assertNoErrors(doFsck(conf
, false));
1101 cleanupTable(tableName
);
1106 * Test -noHdfsChecking option can detect and fix assignments issue.
1108 @Test (timeout
=180000)
1109 public void testFixAssignmentsAndNoHdfsChecking() throws Exception
{
1110 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
1112 setupTable(tableName
);
1113 assertEquals(ROWKEYS
.length
, countRows());
1115 // Mess it up by closing a region
1116 deleteRegion(conf
, tbl
.getTableDescriptor(), Bytes
.toBytes("A"), Bytes
.toBytes("B"), true,
1117 false, false, false, HRegionInfo
.DEFAULT_REPLICA_ID
);
1119 // verify there is no other errors
1120 HBaseFsck hbck
= doFsck(conf
, false);
1122 new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
1123 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_DEPLOYED
,
1124 HBaseFsck
.ErrorReporter
.ERROR_CODE
.HOLE_IN_REGION_CHAIN
});
1126 // verify that noHdfsChecking report the same errors
1127 HBaseFsck fsck
= new HBaseFsck(conf
, hbfsckExecutorService
);
1129 HBaseFsck
.setDisplayFullReport(); // i.e. -details
1131 fsck
.setCheckHdfs(false);
1134 new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
1135 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_DEPLOYED
,
1136 HBaseFsck
.ErrorReporter
.ERROR_CODE
.HOLE_IN_REGION_CHAIN
});
1139 // verify that fixAssignments works fine with noHdfsChecking
1140 fsck
= new HBaseFsck(conf
, hbfsckExecutorService
);
1142 HBaseFsck
.setDisplayFullReport(); // i.e. -details
1144 fsck
.setCheckHdfs(false);
1145 fsck
.setFixAssignments(true);
1147 assertTrue(fsck
.shouldRerun());
1149 assertNoErrors(fsck
);
1151 assertEquals(ROWKEYS
.length
, countRows());
1155 cleanupTable(tableName
);
1160 * Test -noHdfsChecking option can detect region is not in meta but deployed.
1161 * However, it can not fix it without checking Hdfs because we need to get
1162 * the region info from Hdfs in this case, then to patch the meta.
1164 @Test (timeout
=180000)
1165 public void testFixMetaNotWorkingWithNoHdfsChecking() throws Exception
{
1166 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
1168 setupTable(tableName
);
1169 assertEquals(ROWKEYS
.length
, countRows());
1171 // Mess it up by deleting a region from the metadata
1172 deleteRegion(conf
, tbl
.getTableDescriptor(), Bytes
.toBytes("A"),
1173 Bytes
.toBytes("B"), false, true, false, false, HRegionInfo
.DEFAULT_REPLICA_ID
);
1175 // verify there is no other errors
1176 HBaseFsck hbck
= doFsck(conf
, false);
1178 new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
1179 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_META
,
1180 HBaseFsck
.ErrorReporter
.ERROR_CODE
.HOLE_IN_REGION_CHAIN
});
1182 // verify that noHdfsChecking report the same errors
1183 HBaseFsck fsck
= new HBaseFsck(conf
, hbfsckExecutorService
);
1185 HBaseFsck
.setDisplayFullReport(); // i.e. -details
1187 fsck
.setCheckHdfs(false);
1190 new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
1191 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_META
,
1192 HBaseFsck
.ErrorReporter
.ERROR_CODE
.HOLE_IN_REGION_CHAIN
});
1195 // verify that fixMeta doesn't work with noHdfsChecking
1196 fsck
= new HBaseFsck(conf
, hbfsckExecutorService
);
1198 HBaseFsck
.setDisplayFullReport(); // i.e. -details
1200 fsck
.setCheckHdfs(false);
1201 fsck
.setFixAssignments(true);
1202 fsck
.setFixMeta(true);
1204 assertFalse(fsck
.shouldRerun());
1206 new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
1207 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_META
,
1208 HBaseFsck
.ErrorReporter
.ERROR_CODE
.HOLE_IN_REGION_CHAIN
});
1211 // fix the cluster so other tests won't be impacted
1212 fsck
= doFsck(conf
, true);
1213 assertTrue(fsck
.shouldRerun());
1214 fsck
= doFsck(conf
, true);
1215 assertNoErrors(fsck
);
1217 cleanupTable(tableName
);
1222 * Test -fixHdfsHoles doesn't work with -noHdfsChecking option,
1223 * and -noHdfsChecking can't detect orphan Hdfs region.
1225 @Test (timeout
=180000)
1226 public void testFixHdfsHolesNotWorkingWithNoHdfsChecking() throws Exception
{
1227 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
1229 setupTable(tableName
);
1230 assertEquals(ROWKEYS
.length
, countRows());
1232 // Mess it up by creating an overlap in the metadata
1233 admin
.disableTable(tableName
);
1234 deleteRegion(conf
, tbl
.getTableDescriptor(), Bytes
.toBytes("A"), Bytes
.toBytes("B"), true,
1235 true, false, true, HRegionInfo
.DEFAULT_REPLICA_ID
);
1236 admin
.enableTable(tableName
);
1238 HRegionInfo hriOverlap
=
1239 createRegion(tbl
.getTableDescriptor(), Bytes
.toBytes("A2"), Bytes
.toBytes("B"));
1240 TEST_UTIL
.assignRegion(hriOverlap
);
1242 ServerName server
= regionStates
.getRegionServerOfRegion(hriOverlap
);
1243 TEST_UTIL
.assertRegionOnServer(hriOverlap
, server
, REGION_ONLINE_TIMEOUT
);
1245 HBaseFsck hbck
= doFsck(conf
, false);
1246 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
1247 HBaseFsck
.ErrorReporter
.ERROR_CODE
.ORPHAN_HDFS_REGION
,
1248 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_META_OR_DEPLOYED
,
1249 HBaseFsck
.ErrorReporter
.ERROR_CODE
.HOLE_IN_REGION_CHAIN
});
1251 // verify that noHdfsChecking can't detect ORPHAN_HDFS_REGION
1252 HBaseFsck fsck
= new HBaseFsck(conf
, hbfsckExecutorService
);
1254 HBaseFsck
.setDisplayFullReport(); // i.e. -details
1256 fsck
.setCheckHdfs(false);
1258 assertErrors(fsck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
1259 HBaseFsck
.ErrorReporter
.ERROR_CODE
.HOLE_IN_REGION_CHAIN
});
1262 // verify that fixHdfsHoles doesn't work with noHdfsChecking
1263 fsck
= new HBaseFsck(conf
, hbfsckExecutorService
);
1265 HBaseFsck
.setDisplayFullReport(); // i.e. -details
1267 fsck
.setCheckHdfs(false);
1268 fsck
.setFixHdfsHoles(true);
1269 fsck
.setFixHdfsOverlaps(true);
1270 fsck
.setFixHdfsOrphans(true);
1272 assertFalse(fsck
.shouldRerun());
1273 assertErrors(fsck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
1274 HBaseFsck
.ErrorReporter
.ERROR_CODE
.HOLE_IN_REGION_CHAIN
});
1277 if (admin
.isTableDisabled(tableName
)) {
1278 admin
.enableTable(tableName
);
1280 cleanupTable(tableName
);
1285 * This creates a table and then corrupts an hfile. Hbck should quarantine the file.
1287 @Test(timeout
=180000)
1288 public void testQuarantineCorruptHFile() throws Exception
{
1289 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
1291 setupTable(tableName
);
1292 assertEquals(ROWKEYS
.length
, countRows());
1293 admin
.flush(tableName
); // flush is async.
1295 FileSystem fs
= FileSystem
.get(conf
);
1296 Path hfile
= getFlushedHFile(fs
, tableName
);
1298 // Mess it up by leaving a hole in the assignment, meta, and hdfs data
1299 admin
.disableTable(tableName
);
1301 // create new corrupt file called deadbeef (valid hfile name)
1302 Path corrupt
= new Path(hfile
.getParent(), "deadbeef");
1303 TestHFile
.truncateFile(fs
, hfile
, corrupt
);
1304 LOG
.info("Created corrupted file " + corrupt
);
1305 HBaseFsck
.debugLsr(conf
, FSUtils
.getRootDir(conf
));
1307 // we cannot enable here because enable never finished due to the corrupt region.
1308 HBaseFsck res
= HbckTestingUtil
.doHFileQuarantine(conf
, tableName
);
1309 assertEquals(res
.getRetCode(), 0);
1310 HFileCorruptionChecker hfcc
= res
.getHFilecorruptionChecker();
1311 assertEquals(hfcc
.getHFilesChecked(), 5);
1312 assertEquals(hfcc
.getCorrupted().size(), 1);
1313 assertEquals(hfcc
.getFailures().size(), 0);
1314 assertEquals(hfcc
.getQuarantined().size(), 1);
1315 assertEquals(hfcc
.getMissing().size(), 0);
1317 // Its been fixed, verify that we can enable.
1318 admin
.enableTable(tableName
);
1320 cleanupTable(tableName
);
1325 * This creates a table and simulates the race situation where a concurrent compaction or split
1326 * has removed an hfile after the corruption checker learned about it.
1328 @Test(timeout
=180000)
1329 public void testQuarantineMissingHFile() throws Exception
{
1330 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
1332 // inject a fault in the hfcc created.
1333 final FileSystem fs
= FileSystem
.get(conf
);
1334 HBaseFsck hbck
= new HBaseFsck(conf
, hbfsckExecutorService
) {
1336 public HFileCorruptionChecker
createHFileCorruptionChecker(boolean sidelineCorruptHFiles
)
1337 throws IOException
{
1338 return new HFileCorruptionChecker(conf
, executor
, sidelineCorruptHFiles
) {
1339 AtomicBoolean attemptedFirstHFile
= new AtomicBoolean(false);
1341 protected void checkHFile(Path p
) throws IOException
{
1342 if (attemptedFirstHFile
.compareAndSet(false, true)) {
1343 assertTrue(fs
.delete(p
, true)); // make sure delete happened.
1345 super.checkHFile(p
);
1350 doQuarantineTest(tableName
, hbck
, 4, 0, 0, 0, 1); // 4 attempted, but 1 missing.
1355 * This creates and fixes a bad table with regions that has startkey == endkey
1357 @Test (timeout
=180000)
1358 public void testDegenerateRegions() throws Exception
{
1359 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
1361 setupTable(tableName
);
1362 assertNoErrors(doFsck(conf
, false));
1363 assertEquals(ROWKEYS
.length
, countRows());
1365 // Now let's mess it up, by adding a region with a duplicate startkey
1366 HRegionInfo hriDupe
=
1367 createRegion(tbl
.getTableDescriptor(), Bytes
.toBytes("B"), Bytes
.toBytes("B"));
1368 TEST_UTIL
.assignRegion(hriDupe
);
1370 ServerName server
= regionStates
.getRegionServerOfRegion(hriDupe
);
1371 TEST_UTIL
.assertRegionOnServer(hriDupe
, server
, REGION_ONLINE_TIMEOUT
);
1373 HBaseFsck hbck
= doFsck(conf
,false);
1374 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
1375 HBaseFsck
.ErrorReporter
.ERROR_CODE
.DEGENERATE_REGION
,
1376 HBaseFsck
.ErrorReporter
.ERROR_CODE
.DUPE_STARTKEYS
,
1377 HBaseFsck
.ErrorReporter
.ERROR_CODE
.DUPE_STARTKEYS
});
1378 assertEquals(2, hbck
.getOverlapGroups(tableName
).size());
1379 assertEquals(ROWKEYS
.length
, countRows());
1381 // fix the degenerate region.
1384 // check that the degenerate region is gone and no data loss
1385 HBaseFsck hbck2
= doFsck(conf
,false);
1386 assertNoErrors(hbck2
);
1387 assertEquals(0, hbck2
.getOverlapGroups(tableName
).size());
1388 assertEquals(ROWKEYS
.length
, countRows());
1390 cleanupTable(tableName
);
1395 * Test mission REGIONINFO_QUALIFIER in hbase:meta
1397 @Test (timeout
=180000)
1398 public void testMissingRegionInfoQualifier() throws Exception
{
1399 Connection connection
= ConnectionFactory
.createConnection(conf
);
1400 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
1402 setupTable(tableName
);
1404 // Mess it up by removing the RegionInfo for one region.
1405 final List
<Delete
> deletes
= new LinkedList
<>();
1406 Table meta
= connection
.getTable(TableName
.META_TABLE_NAME
, hbfsckExecutorService
);
1407 MetaTableAccessor
.fullScanRegions(connection
, new MetaTableAccessor
.Visitor() {
1410 public boolean visit(Result rowResult
) throws IOException
{
1411 HRegionInfo hri
= MetaTableAccessor
.getHRegionInfo(rowResult
);
1412 if (hri
!= null && !hri
.getTable().isSystemTable()) {
1413 Delete delete
= new Delete(rowResult
.getRow());
1414 delete
.addColumn(HConstants
.CATALOG_FAMILY
, HConstants
.REGIONINFO_QUALIFIER
);
1415 deletes
.add(delete
);
1420 meta
.delete(deletes
);
1422 // Mess it up by creating a fake hbase:meta entry with no associated RegionInfo
1423 meta
.put(new Put(Bytes
.toBytes(tableName
+ ",,1361911384013.810e28f59a57da91c66"))
1424 .addColumn(HConstants
.CATALOG_FAMILY
, HConstants
.SERVER_QUALIFIER
,
1425 Bytes
.toBytes("node1:60020")));
1426 meta
.put(new Put(Bytes
.toBytes(tableName
+ ",,1361911384013.810e28f59a57da91c66"))
1427 .addColumn(HConstants
.CATALOG_FAMILY
, HConstants
.STARTCODE_QUALIFIER
,
1428 Bytes
.toBytes(1362150791183L)));
1431 HBaseFsck hbck
= doFsck(conf
, false);
1432 assertTrue(hbck
.getErrors().getErrorList().contains(
1433 HBaseFsck
.ErrorReporter
.ERROR_CODE
.EMPTY_META_CELL
));
1435 // fix reference file
1436 hbck
= doFsck(conf
, true);
1438 // check that reference file fixed
1439 assertFalse(hbck
.getErrors().getErrorList().contains(
1440 HBaseFsck
.ErrorReporter
.ERROR_CODE
.EMPTY_META_CELL
));
1442 cleanupTable(tableName
);
1448 * Test pluggable error reporter. It can be plugged in
1449 * from system property or configuration.
1451 @Test (timeout
=180000)
1452 public void testErrorReporter() throws Exception
{
1454 MockErrorReporter
.calledCount
= 0;
1455 doFsck(conf
, false);
1456 assertEquals(MockErrorReporter
.calledCount
, 0);
1458 conf
.set("hbasefsck.errorreporter", MockErrorReporter
.class.getName());
1459 doFsck(conf
, false);
1460 assertTrue(MockErrorReporter
.calledCount
> 20);
1462 conf
.set("hbasefsck.errorreporter",
1463 HBaseFsck
.PrintingErrorReporter
.class.getName());
1464 MockErrorReporter
.calledCount
= 0;
1468 @Test(timeout
=180000)
1469 public void testCheckReplication() throws Exception
{
1471 HBaseFsck hbck
= doFsck(conf
, false);
1472 assertNoErrors(hbck
);
1475 ReplicationAdmin replicationAdmin
= new ReplicationAdmin(conf
);
1476 Assert
.assertEquals(0, replicationAdmin
.getPeersCount());
1477 int zkPort
= conf
.getInt(HConstants
.ZOOKEEPER_CLIENT_PORT
,
1478 HConstants
.DEFAULT_ZOOKEPER_CLIENT_PORT
);
1479 ReplicationPeerConfig rpc
= new ReplicationPeerConfig();
1480 rpc
.setClusterKey("127.0.0.1:" + zkPort
+ ":/hbase");
1481 replicationAdmin
.addPeer("1", rpc
, null);
1482 replicationAdmin
.getPeersCount();
1483 Assert
.assertEquals(1, replicationAdmin
.getPeersCount());
1485 // create replicator
1486 ZooKeeperWatcher zkw
= new ZooKeeperWatcher(conf
, "Test Hbase Fsck", connection
);
1487 ReplicationQueues repQueues
=
1488 ReplicationFactory
.getReplicationQueues(new ReplicationQueuesArguments(conf
, connection
,
1490 repQueues
.init("server1");
1491 // queues for current peer, no errors
1492 repQueues
.addLog("1", "file1");
1493 repQueues
.addLog("1-server2", "file1");
1494 Assert
.assertEquals(2, repQueues
.getAllQueues().size());
1495 hbck
= doFsck(conf
, false);
1496 assertNoErrors(hbck
);
1498 // queues for removed peer
1499 repQueues
.addLog("2", "file1");
1500 repQueues
.addLog("2-server2", "file1");
1501 Assert
.assertEquals(4, repQueues
.getAllQueues().size());
1502 hbck
= doFsck(conf
, false);
1503 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
1504 HBaseFsck
.ErrorReporter
.ERROR_CODE
.UNDELETED_REPLICATION_QUEUE
,
1505 HBaseFsck
.ErrorReporter
.ERROR_CODE
.UNDELETED_REPLICATION_QUEUE
});
1508 hbck
= doFsck(conf
, true);
1509 hbck
= doFsck(conf
, false);
1510 assertNoErrors(hbck
);
1511 // ensure only "2" is deleted
1512 Assert
.assertEquals(2, repQueues
.getAllQueues().size());
1513 Assert
.assertNull(repQueues
.getLogsInQueue("2"));
1514 Assert
.assertNull(repQueues
.getLogsInQueue("2-sever2"));
1516 replicationAdmin
.removePeer("1");
1517 repQueues
.removeAllQueues();
1519 replicationAdmin
.close();
1523 * This creates and fixes a bad table with a missing region -- hole in meta
1524 * and data present but .regioninfo missing (an orphan hdfs region)in the fs.
1526 @Test(timeout
=180000)
1527 public void testHDFSRegioninfoMissing() throws Exception
{
1528 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
1530 setupTable(tableName
);
1531 assertEquals(ROWKEYS
.length
, countRows());
1533 // Mess it up by leaving a hole in the meta data
1534 admin
.disableTable(tableName
);
1535 deleteRegion(conf
, tbl
.getTableDescriptor(), Bytes
.toBytes("B"), Bytes
.toBytes("C"), true,
1536 true, false, true, HRegionInfo
.DEFAULT_REPLICA_ID
);
1537 admin
.enableTable(tableName
);
1539 HBaseFsck hbck
= doFsck(conf
, false);
1541 new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
1542 HBaseFsck
.ErrorReporter
.ERROR_CODE
.ORPHAN_HDFS_REGION
,
1543 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_META_OR_DEPLOYED
,
1544 HBaseFsck
.ErrorReporter
.ERROR_CODE
.HOLE_IN_REGION_CHAIN
});
1545 // holes are separate from overlap groups
1546 assertEquals(0, hbck
.getOverlapGroups(tableName
).size());
1551 // check that hole fixed
1552 assertNoErrors(doFsck(conf
, false));
1553 assertEquals(ROWKEYS
.length
, countRows());
1555 cleanupTable(tableName
);
1560 * This creates and fixes a bad table with a region that is missing meta and
1561 * not assigned to a region server.
1563 @Test (timeout
=180000)
1564 public void testNotInMetaOrDeployedHole() throws Exception
{
1565 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
1567 setupTable(tableName
);
1568 assertEquals(ROWKEYS
.length
, countRows());
1570 // Mess it up by leaving a hole in the meta data
1571 admin
.disableTable(tableName
);
1572 deleteRegion(conf
, tbl
.getTableDescriptor(), Bytes
.toBytes("B"), Bytes
.toBytes("C"), true,
1573 true, false); // don't rm from fs
1574 admin
.enableTable(tableName
);
1576 HBaseFsck hbck
= doFsck(conf
, false);
1578 new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
1579 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_META_OR_DEPLOYED
,
1580 HBaseFsck
.ErrorReporter
.ERROR_CODE
.HOLE_IN_REGION_CHAIN
});
1581 // holes are separate from overlap groups
1582 assertEquals(0, hbck
.getOverlapGroups(tableName
).size());
1585 assertErrors(doFsck(conf
, true),
1586 new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
1587 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_META_OR_DEPLOYED
,
1588 HBaseFsck
.ErrorReporter
.ERROR_CODE
.HOLE_IN_REGION_CHAIN
});
1590 // check that hole fixed
1591 assertNoErrors(doFsck(conf
, false));
1592 assertEquals(ROWKEYS
.length
, countRows());
1594 cleanupTable(tableName
);
1598 @Test (timeout
=180000)
1599 public void testCleanUpDaughtersNotInMetaAfterFailedSplit() throws Exception
{
1600 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
1601 MiniHBaseCluster cluster
= TEST_UTIL
.getHBaseCluster();
1603 HTableDescriptor desc
= new HTableDescriptor(tableName
);
1604 desc
.addFamily(new HColumnDescriptor(Bytes
.toBytes("f")));
1605 createTable(TEST_UTIL
, desc
, null);
1607 tbl
= connection
.getTable(desc
.getTableName());
1608 for (int i
= 0; i
< 5; i
++) {
1609 Put p1
= new Put(("r" + i
).getBytes());
1610 p1
.addColumn(Bytes
.toBytes("f"), "q1".getBytes(), "v".getBytes());
1613 admin
.flush(desc
.getTableName());
1614 List
<HRegion
> regions
= cluster
.getRegions(desc
.getTableName());
1615 int serverWith
= cluster
.getServerWith(regions
.get(0).getRegionInfo().getRegionName());
1616 HRegionServer regionServer
= cluster
.getRegionServer(serverWith
);
1617 byte[] parentRegionName
= regions
.get(0).getRegionInfo().getRegionName();
1618 cluster
.getServerWith(parentRegionName
);
1619 // Create daughters without adding to META table
1620 MasterProcedureEnv env
= cluster
.getMaster().getMasterProcedureExecutor().getEnvironment();
1621 SplitTableRegionProcedure splitR
= new SplitTableRegionProcedure(
1622 env
, regions
.get(0).getRegionInfo(), Bytes
.toBytes("r3"));
1623 splitR
.prepareSplitRegion(env
);
1624 splitR
.setRegionStateToSplitting(env
);
1625 splitR
.closeParentRegionForSplit(env
);
1626 splitR
.createDaughterRegions(env
);
1628 AssignmentManager am
= cluster
.getMaster().getAssignmentManager();
1629 for (RegionState state
: am
.getRegionStates().getRegionsInTransition()) {
1630 am
.regionOffline(state
.getRegion());
1633 Map
<HRegionInfo
, ServerName
> regionsMap
= new HashMap
<>();
1634 regionsMap
.put(regions
.get(0).getRegionInfo(), regionServer
.getServerName());
1635 am
.assign(regionsMap
);
1636 am
.waitForAssignment(regions
.get(0).getRegionInfo());
1637 HBaseFsck hbck
= doFsck(conf
, false);
1638 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
1639 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_META_OR_DEPLOYED
,
1640 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_META_OR_DEPLOYED
});
1641 // holes are separate from overlap groups
1642 assertEquals(0, hbck
.getOverlapGroups(tableName
).size());
1646 doFsck(conf
, false, true, false, false, false, false, false, false, false, false, false,
1648 new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
1649 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_META_OR_DEPLOYED
,
1650 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_META_OR_DEPLOYED
});
1652 // check that hole fixed
1653 assertNoErrors(doFsck(conf
, false));
1654 assertEquals(5, countRows());
1660 cleanupTable(tableName
);
1665 * This creates fixes a bad table with a hole in meta.
1667 @Test (timeout
=180000)
1668 public void testNotInMetaHole() throws Exception
{
1669 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
1671 setupTable(tableName
);
1672 assertEquals(ROWKEYS
.length
, countRows());
1674 // Mess it up by leaving a hole in the meta data
1675 admin
.disableTable(tableName
);
1676 deleteRegion(conf
, tbl
.getTableDescriptor(), Bytes
.toBytes("B"), Bytes
.toBytes("C"), false,
1677 true, false); // don't rm from fs
1678 admin
.enableTable(tableName
);
1680 HBaseFsck hbck
= doFsck(conf
, false);
1682 new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
1683 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_META_OR_DEPLOYED
,
1684 HBaseFsck
.ErrorReporter
.ERROR_CODE
.HOLE_IN_REGION_CHAIN
});
1685 // holes are separate from overlap groups
1686 assertEquals(0, hbck
.getOverlapGroups(tableName
).size());
1689 assertErrors(doFsck(conf
, true),
1690 new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
1691 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_META_OR_DEPLOYED
,
1692 HBaseFsck
.ErrorReporter
.ERROR_CODE
.HOLE_IN_REGION_CHAIN
});
1694 // check that hole fixed
1695 assertNoErrors(doFsck(conf
, false));
1696 assertEquals(ROWKEYS
.length
, countRows());
1698 cleanupTable(tableName
);
1703 * This creates and fixes a bad table with a region that is in meta but has
1704 * no deployment or data hdfs
1706 @Test (timeout
=180000)
1707 public void testNotInHdfs() throws Exception
{
1708 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
1710 setupTable(tableName
);
1711 assertEquals(ROWKEYS
.length
, countRows());
1713 // make sure data in regions, if in wal only there is no data loss
1714 admin
.flush(tableName
);
1716 // Mess it up by leaving a hole in the hdfs data
1717 deleteRegion(conf
, tbl
.getTableDescriptor(), Bytes
.toBytes("B"), Bytes
.toBytes("C"), false,
1718 false, true); // don't rm meta
1720 HBaseFsck hbck
= doFsck(conf
, false);
1721 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {
1722 HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_HDFS
});
1723 // holes are separate from overlap groups
1724 assertEquals(0, hbck
.getOverlapGroups(tableName
).size());
1729 // check that hole fixed
1730 assertNoErrors(doFsck(conf
,false));
1731 assertEquals(ROWKEYS
.length
- 2, countRows());
1733 cleanupTable(tableName
);
1738 * This creates a table and simulates the race situation where a concurrent compaction or split
1739 * has removed an colfam dir before the corruption checker got to it.
1741 // Disabled because fails sporadically. Is this test right? Timing-wise, there could be no
1742 // files in a column family on initial creation -- as suggested by Matteo.
1744 @Test(timeout
=180000)
1745 public void testQuarantineMissingFamdir() throws Exception
{
1746 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
1747 // inject a fault in the hfcc created.
1748 final FileSystem fs
= FileSystem
.get(conf
);
1749 HBaseFsck hbck
= new HBaseFsck(conf
, hbfsckExecutorService
) {
1751 public HFileCorruptionChecker
createHFileCorruptionChecker(boolean sidelineCorruptHFiles
)
1752 throws IOException
{
1753 return new HFileCorruptionChecker(conf
, executor
, sidelineCorruptHFiles
) {
1754 AtomicBoolean attemptedFirstHFile
= new AtomicBoolean(false);
1756 protected void checkColFamDir(Path p
) throws IOException
{
1757 if (attemptedFirstHFile
.compareAndSet(false, true)) {
1758 assertTrue(fs
.delete(p
, true)); // make sure delete happened.
1760 super.checkColFamDir(p
);
1765 doQuarantineTest(tableName
, hbck
, 3, 0, 0, 0, 1);
1770 * This creates a table and simulates the race situation where a concurrent compaction or split
1771 * has removed a region dir before the corruption checker got to it.
1773 @Test(timeout
=180000)
1774 public void testQuarantineMissingRegionDir() throws Exception
{
1775 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
1776 // inject a fault in the hfcc created.
1777 final FileSystem fs
= FileSystem
.get(conf
);
1778 HBaseFsck hbck
= new HBaseFsck(conf
, hbfsckExecutorService
) {
1780 public HFileCorruptionChecker
createHFileCorruptionChecker(boolean sidelineCorruptHFiles
)
1781 throws IOException
{
1782 return new HFileCorruptionChecker(conf
, executor
, sidelineCorruptHFiles
) {
1783 AtomicBoolean attemptedFirstHFile
= new AtomicBoolean(false);
1785 protected void checkRegionDir(Path p
) throws IOException
{
1786 if (attemptedFirstHFile
.compareAndSet(false, true)) {
1787 assertTrue(fs
.delete(p
, true)); // make sure delete happened.
1789 super.checkRegionDir(p
);
1794 doQuarantineTest(tableName
, hbck
, 3, 0, 0, 0, 1);