2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org
.apache
.hadoop
.hbase
.util
;
20 import java
.io
.Closeable
;
21 import java
.io
.FileNotFoundException
;
22 import java
.io
.IOException
;
23 import java
.io
.InterruptedIOException
;
24 import java
.io
.PrintWriter
;
25 import java
.io
.StringWriter
;
26 import java
.net
.InetAddress
;
28 import java
.util
.ArrayList
;
29 import java
.util
.Arrays
;
30 import java
.util
.Collection
;
31 import java
.util
.Collections
;
32 import java
.util
.Comparator
;
33 import java
.util
.EnumSet
;
34 import java
.util
.HashMap
;
35 import java
.util
.HashSet
;
36 import java
.util
.Iterator
;
37 import java
.util
.List
;
38 import java
.util
.Locale
;
40 import java
.util
.Map
.Entry
;
41 import java
.util
.Objects
;
42 import java
.util
.Optional
;
44 import java
.util
.SortedMap
;
45 import java
.util
.SortedSet
;
46 import java
.util
.TreeMap
;
47 import java
.util
.TreeSet
;
48 import java
.util
.Vector
;
49 import java
.util
.concurrent
.Callable
;
50 import java
.util
.concurrent
.ConcurrentSkipListMap
;
51 import java
.util
.concurrent
.ExecutionException
;
52 import java
.util
.concurrent
.ExecutorService
;
53 import java
.util
.concurrent
.Executors
;
54 import java
.util
.concurrent
.Future
;
55 import java
.util
.concurrent
.FutureTask
;
56 import java
.util
.concurrent
.ScheduledThreadPoolExecutor
;
57 import java
.util
.concurrent
.TimeUnit
;
58 import java
.util
.concurrent
.TimeoutException
;
59 import java
.util
.concurrent
.atomic
.AtomicBoolean
;
60 import java
.util
.concurrent
.atomic
.AtomicInteger
;
61 import java
.util
.stream
.Collectors
;
62 import org
.apache
.commons
.io
.IOUtils
;
63 import org
.apache
.commons
.lang3
.RandomStringUtils
;
64 import org
.apache
.commons
.lang3
.StringUtils
;
65 import org
.apache
.hadoop
.conf
.Configuration
;
66 import org
.apache
.hadoop
.conf
.Configured
;
67 import org
.apache
.hadoop
.fs
.FSDataOutputStream
;
68 import org
.apache
.hadoop
.fs
.FileStatus
;
69 import org
.apache
.hadoop
.fs
.FileSystem
;
70 import org
.apache
.hadoop
.fs
.Path
;
71 import org
.apache
.hadoop
.fs
.permission
.FsAction
;
72 import org
.apache
.hadoop
.fs
.permission
.FsPermission
;
73 import org
.apache
.hadoop
.hbase
.Abortable
;
74 import org
.apache
.hadoop
.hbase
.Cell
;
75 import org
.apache
.hadoop
.hbase
.CellUtil
;
76 import org
.apache
.hadoop
.hbase
.ClusterMetrics
;
77 import org
.apache
.hadoop
.hbase
.ClusterMetrics
.Option
;
78 import org
.apache
.hadoop
.hbase
.HBaseConfiguration
;
79 import org
.apache
.hadoop
.hbase
.HBaseInterfaceAudience
;
80 import org
.apache
.hadoop
.hbase
.HConstants
;
81 import org
.apache
.hadoop
.hbase
.HRegionInfo
;
82 import org
.apache
.hadoop
.hbase
.HRegionLocation
;
83 import org
.apache
.hadoop
.hbase
.KeyValue
;
84 import org
.apache
.hadoop
.hbase
.MasterNotRunningException
;
85 import org
.apache
.hadoop
.hbase
.MetaTableAccessor
;
86 import org
.apache
.hadoop
.hbase
.RegionLocations
;
87 import org
.apache
.hadoop
.hbase
.ServerName
;
88 import org
.apache
.hadoop
.hbase
.TableName
;
89 import org
.apache
.hadoop
.hbase
.TableNotFoundException
;
90 import org
.apache
.hadoop
.hbase
.ZooKeeperConnectionException
;
91 import org
.apache
.hadoop
.hbase
.client
.Admin
;
92 import org
.apache
.hadoop
.hbase
.client
.ClusterConnection
;
93 import org
.apache
.hadoop
.hbase
.client
.ColumnFamilyDescriptor
;
94 import org
.apache
.hadoop
.hbase
.client
.ColumnFamilyDescriptorBuilder
;
95 import org
.apache
.hadoop
.hbase
.client
.Connection
;
96 import org
.apache
.hadoop
.hbase
.client
.ConnectionFactory
;
97 import org
.apache
.hadoop
.hbase
.client
.Delete
;
98 import org
.apache
.hadoop
.hbase
.client
.Get
;
99 import org
.apache
.hadoop
.hbase
.client
.Put
;
100 import org
.apache
.hadoop
.hbase
.client
.RegionInfo
;
101 import org
.apache
.hadoop
.hbase
.client
.RegionInfoBuilder
;
102 import org
.apache
.hadoop
.hbase
.client
.RegionReplicaUtil
;
103 import org
.apache
.hadoop
.hbase
.client
.Result
;
104 import org
.apache
.hadoop
.hbase
.client
.ResultScanner
;
105 import org
.apache
.hadoop
.hbase
.client
.RowMutations
;
106 import org
.apache
.hadoop
.hbase
.client
.Scan
;
107 import org
.apache
.hadoop
.hbase
.client
.Table
;
108 import org
.apache
.hadoop
.hbase
.client
.TableDescriptor
;
109 import org
.apache
.hadoop
.hbase
.client
.TableDescriptorBuilder
;
110 import org
.apache
.hadoop
.hbase
.client
.TableState
;
111 import org
.apache
.hadoop
.hbase
.io
.FileLink
;
112 import org
.apache
.hadoop
.hbase
.io
.HFileLink
;
113 import org
.apache
.hadoop
.hbase
.io
.hfile
.CacheConfig
;
114 import org
.apache
.hadoop
.hbase
.io
.hfile
.HFile
;
115 import org
.apache
.hadoop
.hbase
.log
.HBaseMarkers
;
116 import org
.apache
.hadoop
.hbase
.master
.MasterFileSystem
;
117 import org
.apache
.hadoop
.hbase
.master
.RegionState
;
118 import org
.apache
.hadoop
.hbase
.regionserver
.HRegion
;
119 import org
.apache
.hadoop
.hbase
.regionserver
.HRegionFileSystem
;
120 import org
.apache
.hadoop
.hbase
.regionserver
.StoreFileInfo
;
121 import org
.apache
.hadoop
.hbase
.replication
.ReplicationException
;
122 import org
.apache
.hadoop
.hbase
.replication
.ReplicationPeerDescription
;
123 import org
.apache
.hadoop
.hbase
.replication
.ReplicationQueueStorage
;
124 import org
.apache
.hadoop
.hbase
.replication
.ReplicationStorageFactory
;
125 import org
.apache
.hadoop
.hbase
.replication
.ReplicationUtils
;
126 import org
.apache
.hadoop
.hbase
.security
.AccessDeniedException
;
127 import org
.apache
.hadoop
.hbase
.security
.UserProvider
;
128 import org
.apache
.hadoop
.hbase
.util
.Bytes
.ByteArrayComparator
;
129 import org
.apache
.hadoop
.hbase
.util
.HBaseFsck
.ErrorReporter
.ERROR_CODE
;
130 import org
.apache
.hadoop
.hbase
.util
.hbck
.HFileCorruptionChecker
;
131 import org
.apache
.hadoop
.hbase
.util
.hbck
.ReplicationChecker
;
132 import org
.apache
.hadoop
.hbase
.util
.hbck
.TableIntegrityErrorHandler
;
133 import org
.apache
.hadoop
.hbase
.util
.hbck
.TableIntegrityErrorHandlerImpl
;
134 import org
.apache
.hadoop
.hbase
.wal
.WAL
;
135 import org
.apache
.hadoop
.hbase
.wal
.WALFactory
;
136 import org
.apache
.hadoop
.hbase
.wal
.WALSplitter
;
137 import org
.apache
.hadoop
.hbase
.zookeeper
.ZKUtil
;
138 import org
.apache
.hadoop
.hbase
.zookeeper
.ZKWatcher
;
139 import org
.apache
.hadoop
.hbase
.zookeeper
.ZNodePaths
;
140 import org
.apache
.hadoop
.hdfs
.protocol
.AlreadyBeingCreatedException
;
141 import org
.apache
.hadoop
.ipc
.RemoteException
;
142 import org
.apache
.hadoop
.security
.UserGroupInformation
;
143 import org
.apache
.hadoop
.util
.ReflectionUtils
;
144 import org
.apache
.hadoop
.util
.Tool
;
145 import org
.apache
.hadoop
.util
.ToolRunner
;
146 import org
.apache
.yetus
.audience
.InterfaceAudience
;
147 import org
.apache
.yetus
.audience
.InterfaceStability
;
148 import org
.apache
.zookeeper
.KeeperException
;
149 import org
.slf4j
.Logger
;
150 import org
.slf4j
.LoggerFactory
;
152 import org
.apache
.hbase
.thirdparty
.com
.google
.common
.annotations
.VisibleForTesting
;
153 import org
.apache
.hbase
.thirdparty
.com
.google
.common
.base
.Joiner
;
154 import org
.apache
.hbase
.thirdparty
.com
.google
.common
.base
.Preconditions
;
155 import org
.apache
.hbase
.thirdparty
.com
.google
.common
.collect
.ImmutableList
;
156 import org
.apache
.hbase
.thirdparty
.com
.google
.common
.collect
.Lists
;
157 import org
.apache
.hbase
.thirdparty
.com
.google
.common
.collect
.Multimap
;
158 import org
.apache
.hbase
.thirdparty
.com
.google
.common
.collect
.Ordering
;
159 import org
.apache
.hbase
.thirdparty
.com
.google
.common
.collect
.Sets
;
160 import org
.apache
.hbase
.thirdparty
.com
.google
.common
.collect
.TreeMultimap
;
162 import org
.apache
.hadoop
.hbase
.shaded
.protobuf
.ProtobufUtil
;
163 import org
.apache
.hadoop
.hbase
.shaded
.protobuf
.generated
.AdminProtos
.AdminService
.BlockingInterface
;
166 * HBaseFsck (hbck) is a tool for checking and repairing region consistency and
167 * table integrity problems in a corrupted HBase. This tool was written for hbase-1.x. It does not
168 * work with hbase-2.x; it can read state but is not allowed to change state; i.e. effect 'repair'.
169 * See hbck2 (HBASE-19121) for a hbck tool for hbase2.
172 * Region consistency checks verify that hbase:meta, region deployment on region
173 * servers and the state of data in HDFS (.regioninfo files) all are in
176 * Table integrity checks verify that all possible row keys resolve to exactly
177 * one region of a table. This means there are no individual degenerate
178 * or backwards regions; no holes between regions; and that there are no
179 * overlapping regions.
181 * The general repair strategy works in two phases:
183 * <li> Repair Table Integrity on HDFS. (merge or fabricate regions)
184 * <li> Repair Region Consistency with hbase:meta and assignments
187 * For table integrity repairs, the tables' region directories are scanned
188 * for .regioninfo files. Each table's integrity is then verified. If there
189 * are any orphan regions (regions with no .regioninfo files) or holes, new
190 * regions are fabricated. Backwards regions are sidelined as well as empty
191 * degenerate (endkey==startkey) regions. If there are any overlapping regions,
192 * a new region is created and all data is merged into the new region.
194 * Table integrity repairs deal solely with HDFS and could potentially be done
195 * offline -- the hbase region servers or master do not need to be running.
196 * This phase can eventually be used to completely reconstruct the hbase:meta table in
197 * an offline fashion.
199 * Region consistency requires three conditions -- 1) valid .regioninfo file
200 * present in an HDFS region dir, 2) valid row with .regioninfo data in META,
201 * and 3) a region is deployed only at the regionserver that was assigned to
202 * with proper state in the master.
204 * Region consistency repairs require hbase to be online so that hbck can
205 * contact the HBase master and region servers. The hbck#connect() method must
206 * first be called successfully. Much of the region consistency information
207 * is transient and less risky to repair.
209 * If hbck is run from the command line, there are a handful of arguments that
210 * can be used to limit the kinds of repairs hbck will do. See the code in
211 * {@link #printUsageAndExit()} for more details.
213 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience
.TOOLS
)
214 @InterfaceStability.Evolving
215 public class HBaseFsck
extends Configured
implements Closeable
{
216 public static final long DEFAULT_TIME_LAG
= 60000; // default value of 1 minute
217 public static final long DEFAULT_SLEEP_BEFORE_RERUN
= 10000;
218 private static final int MAX_NUM_THREADS
= 50; // #threads to contact regions
219 private static boolean rsSupportsOffline
= true;
220 private static final int DEFAULT_OVERLAPS_TO_SIDELINE
= 2;
221 private static final int DEFAULT_MAX_MERGE
= 5;
222 private static final String TO_BE_LOADED
= "to_be_loaded";
224 * Here is where hbase-1.x used to default the lock for hbck1.
225 * It puts in place a lock when it goes to write/make changes.
228 public static final String HBCK_LOCK_FILE
= "hbase-hbck.lock";
229 private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS
= 5;
230 private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL
= 200; // milliseconds
231 private static final int DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME
= 5000; // milliseconds
232 // We have to set the timeout value > HdfsConstants.LEASE_SOFTLIMIT_PERIOD.
233 // In HADOOP-2.6 and later, the Namenode proxy now created with custom RetryPolicy for
234 // AlreadyBeingCreatedException which is implies timeout on this operations up to
235 // HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds).
236 private static final int DEFAULT_WAIT_FOR_LOCK_TIMEOUT
= 80; // seconds
237 private static final int DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS
= 5;
238 private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL
= 200; // milliseconds
239 private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME
= 5000; // milliseconds
241 /**********************
243 **********************/
244 private static final Logger LOG
= LoggerFactory
.getLogger(HBaseFsck
.class.getName());
245 private ClusterMetrics status
;
246 private ClusterConnection connection
;
249 // threads to do ||izable tasks: retrieve data from regionservers, handle overlapping regions
250 protected ExecutorService executor
;
251 private long startMillis
= EnvironmentEdgeManager
.currentTime();
252 private HFileCorruptionChecker hfcc
;
253 private int retcode
= 0;
254 private Path HBCK_LOCK_PATH
;
255 private FSDataOutputStream hbckOutFd
;
256 // This lock is to prevent cleanup of balancer resources twice between
257 // ShutdownHook and the main code. We cleanup only if the connect() is
259 private final AtomicBoolean hbckLockCleanup
= new AtomicBoolean(false);
261 // Unsupported options in HBase 2.0+
262 private static final Set
<String
> unsupportedOptionsInV2
= Sets
.newHashSet("-fix",
263 "-fixAssignments", "-fixMeta", "-fixHdfsHoles", "-fixHdfsOrphans", "-fixTableOrphans",
264 "-fixHdfsOverlaps", "-sidelineBigOverlaps", "-fixSplitParents", "-removeParents",
265 "-fixEmptyMetaCells", "-repair", "-repairHoles", "-maxOverlapsToSideline", "-maxMerge");
270 private static boolean details
= false; // do we display the full report
271 private long timelag
= DEFAULT_TIME_LAG
; // tables whose modtime is older
272 private static boolean forceExclusive
= false; // only this hbck can modify HBase
273 private boolean fixAssignments
= false; // fix assignment errors?
274 private boolean fixMeta
= false; // fix meta errors?
275 private boolean checkHdfs
= true; // load and check fs consistency?
276 private boolean fixHdfsHoles
= false; // fix fs holes?
277 private boolean fixHdfsOverlaps
= false; // fix fs overlaps (risky)
278 private boolean fixHdfsOrphans
= false; // fix fs holes (missing .regioninfo)
279 private boolean fixTableOrphans
= false; // fix fs holes (missing .tableinfo)
280 private boolean fixVersionFile
= false; // fix missing hbase.version file in hdfs
281 private boolean fixSplitParents
= false; // fix lingering split parents
282 private boolean removeParents
= false; // remove split parents
283 private boolean fixReferenceFiles
= false; // fix lingering reference store file
284 private boolean fixHFileLinks
= false; // fix lingering HFileLinks
285 private boolean fixEmptyMetaCells
= false; // fix (remove) empty REGIONINFO_QUALIFIER rows
286 private boolean fixReplication
= false; // fix undeleted replication queues for removed peer
287 private boolean cleanReplicationBarrier
= false; // clean replication barriers of a table
288 private boolean fixAny
= false; // Set to true if any of the fix is required.
290 // limit checking/fixes to listed tables, if empty attempt to check/fix all
291 // hbase:meta are always checked
292 private Set
<TableName
> tablesIncluded
= new HashSet
<>();
293 private TableName cleanReplicationBarrierTable
;
294 private int maxMerge
= DEFAULT_MAX_MERGE
; // maximum number of overlapping regions to merge
295 // maximum number of overlapping regions to sideline
296 private int maxOverlapsToSideline
= DEFAULT_OVERLAPS_TO_SIDELINE
;
297 private boolean sidelineBigOverlaps
= false; // sideline overlaps with >maxMerge regions
298 private Path sidelineDir
= null;
300 private boolean rerun
= false; // if we tried to fix something, rerun hbck
301 private static boolean summary
= false; // if we want to print less output
302 private boolean checkMetaOnly
= false;
303 private boolean checkRegionBoundaries
= false;
304 private boolean ignorePreCheckPermission
= false; // if pre-check permission
309 final private ErrorReporter errors
;
313 * This map contains the state of all hbck items. It maps from encoded region
314 * name to HbckInfo structure. The information contained in HbckInfo is used
315 * to detect and correct consistency (hdfs/meta/deployment) problems.
317 private TreeMap
<String
, HbckInfo
> regionInfoMap
= new TreeMap
<>();
318 // Empty regioninfo qualifiers in hbase:meta
319 private Set
<Result
> emptyRegionInfoQualifiers
= new HashSet
<>();
322 * This map from Tablename -> TableInfo contains the structures necessary to
323 * detect table consistency problems (holes, dupes, overlaps). It is sorted
326 * If tablesIncluded is empty, this map contains all tables.
327 * Otherwise, it contains only meta tables and tables in tablesIncluded,
328 * unless checkMetaOnly is specified, in which case, it contains only
331 private SortedMap
<TableName
, TableInfo
> tablesInfo
= new ConcurrentSkipListMap
<>();
334 * When initially looking at HDFS, we attempt to find any orphaned data.
336 private List
<HbckInfo
> orphanHdfsDirs
= Collections
.synchronizedList(new ArrayList
<HbckInfo
>());
338 private Map
<TableName
, Set
<String
>> orphanTableDirs
= new HashMap
<>();
339 private Map
<TableName
, TableState
> tableStates
= new HashMap
<>();
340 private final RetryCounterFactory lockFileRetryCounterFactory
;
341 private final RetryCounterFactory createZNodeRetryCounterFactory
;
343 private Map
<TableName
, Set
<String
>> skippedRegions
= new HashMap
<>();
345 private ZKWatcher zkw
= null;
346 private String hbckEphemeralNodePath
= null;
347 private boolean hbckZodeCreated
= false;
352 * @param conf Configuration object
353 * @throws MasterNotRunningException if the master is not running
354 * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
356 public HBaseFsck(Configuration conf
) throws IOException
, ClassNotFoundException
{
357 this(conf
, createThreadPool(conf
));
360 private static ExecutorService
createThreadPool(Configuration conf
) {
361 int numThreads
= conf
.getInt("hbasefsck.numthreads", MAX_NUM_THREADS
);
362 return new ScheduledThreadPoolExecutor(numThreads
, Threads
.newDaemonThreadFactory("hbasefsck"));
369 * Configuration object
370 * @throws MasterNotRunningException
371 * if the master is not running
372 * @throws ZooKeeperConnectionException
373 * if unable to connect to ZooKeeper
375 public HBaseFsck(Configuration conf
, ExecutorService exec
) throws MasterNotRunningException
,
376 ZooKeeperConnectionException
, IOException
, ClassNotFoundException
{
378 errors
= getErrorReporter(getConf());
379 this.executor
= exec
;
380 lockFileRetryCounterFactory
= createLockRetryCounterFactory(getConf());
381 createZNodeRetryCounterFactory
= createZnodeRetryCounterFactory(getConf());
382 zkw
= createZooKeeperWatcher();
386 * @return A retry counter factory configured for retrying lock file creation.
388 public static RetryCounterFactory
createLockRetryCounterFactory(Configuration conf
) {
389 return new RetryCounterFactory(
390 conf
.getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS
),
391 conf
.getInt("hbase.hbck.lockfile.attempt.sleep.interval",
392 DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL
),
393 conf
.getInt("hbase.hbck.lockfile.attempt.maxsleeptime",
394 DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME
));
398 * @return A retry counter factory configured for retrying znode creation.
400 private static RetryCounterFactory
createZnodeRetryCounterFactory(Configuration conf
) {
401 return new RetryCounterFactory(
402 conf
.getInt("hbase.hbck.createznode.attempts", DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS
),
403 conf
.getInt("hbase.hbck.createznode.attempt.sleep.interval",
404 DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL
),
405 conf
.getInt("hbase.hbck.createznode.attempt.maxsleeptime",
406 DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME
));
410 * @return Return the tmp dir this tool writes too.
413 public static Path
getTmpDir(Configuration conf
) throws IOException
{
414 return new Path(FSUtils
.getRootDir(conf
), HConstants
.HBASE_TEMP_DIRECTORY
);
417 private static class FileLockCallable
implements Callable
<FSDataOutputStream
> {
418 RetryCounter retryCounter
;
419 private final Configuration conf
;
420 private Path hbckLockPath
= null;
422 public FileLockCallable(Configuration conf
, RetryCounter retryCounter
) {
423 this.retryCounter
= retryCounter
;
428 * @return Will be <code>null</code> unless you call {@link #call()}
430 Path
getHbckLockPath() {
431 return this.hbckLockPath
;
435 public FSDataOutputStream
call() throws IOException
{
437 FileSystem fs
= FSUtils
.getCurrentFileSystem(this.conf
);
438 FsPermission defaultPerms
= FSUtils
.getFilePermissions(fs
, this.conf
,
439 HConstants
.DATA_FILE_UMASK_KEY
);
440 Path tmpDir
= getTmpDir(conf
);
441 this.hbckLockPath
= new Path(tmpDir
, HBCK_LOCK_FILE
);
443 final FSDataOutputStream out
= createFileWithRetries(fs
, this.hbckLockPath
, defaultPerms
);
444 out
.writeBytes(InetAddress
.getLocalHost().toString());
445 // Add a note into the file we write on why hbase2 is writing out an hbck1 lock file.
446 out
.writeBytes(" Written by an hbase-2.x Master to block an " +
447 "attempt by an hbase-1.x HBCK tool making modification to state. " +
448 "See 'HBCK must match HBase server version' in the hbase refguide.");
451 } catch(RemoteException e
) {
452 if(AlreadyBeingCreatedException
.class.getName().equals(e
.getClassName())){
460 private FSDataOutputStream
createFileWithRetries(final FileSystem fs
,
461 final Path hbckLockFilePath
, final FsPermission defaultPerms
)
463 IOException exception
= null;
466 return FSUtils
.create(fs
, hbckLockFilePath
, defaultPerms
, false);
467 } catch (IOException ioe
) {
468 LOG
.info("Failed to create lock file " + hbckLockFilePath
.getName()
469 + ", try=" + (retryCounter
.getAttemptTimes() + 1) + " of "
470 + retryCounter
.getMaxAttempts());
471 LOG
.debug("Failed to create lock file " + hbckLockFilePath
.getName(),
475 retryCounter
.sleepUntilNextRetry();
476 } catch (InterruptedException ie
) {
477 throw (InterruptedIOException
) new InterruptedIOException(
478 "Can't create lock file " + hbckLockFilePath
.getName())
482 } while (retryCounter
.shouldRetry());
489 * This method maintains a lock using a file. If the creation fails we return null
491 * @return FSDataOutputStream object corresponding to the newly opened lock file
492 * @throws IOException if IO failure occurs
494 public static Pair
<Path
, FSDataOutputStream
> checkAndMarkRunningHbck(Configuration conf
,
495 RetryCounter retryCounter
) throws IOException
{
496 FileLockCallable callable
= new FileLockCallable(conf
, retryCounter
);
497 ExecutorService executor
= Executors
.newFixedThreadPool(1);
498 FutureTask
<FSDataOutputStream
> futureTask
= new FutureTask
<>(callable
);
499 executor
.execute(futureTask
);
500 final int timeoutInSeconds
= conf
.getInt(
501 "hbase.hbck.lockfile.maxwaittime", DEFAULT_WAIT_FOR_LOCK_TIMEOUT
);
502 FSDataOutputStream stream
= null;
504 stream
= futureTask
.get(timeoutInSeconds
, TimeUnit
.SECONDS
);
505 } catch (ExecutionException ee
) {
506 LOG
.warn("Encountered exception when opening lock file", ee
);
507 } catch (InterruptedException ie
) {
508 LOG
.warn("Interrupted when opening lock file", ie
);
509 Thread
.currentThread().interrupt();
510 } catch (TimeoutException exception
) {
511 // took too long to obtain lock
512 LOG
.warn("Took more than " + timeoutInSeconds
+ " seconds in obtaining lock");
513 futureTask
.cancel(true);
515 executor
.shutdownNow();
517 return new Pair
<Path
, FSDataOutputStream
>(callable
.getHbckLockPath(), stream
);
520 private void unlockHbck() {
521 if (isExclusive() && hbckLockCleanup
.compareAndSet(true, false)) {
522 RetryCounter retryCounter
= lockFileRetryCounterFactory
.create();
525 IOUtils
.closeQuietly(hbckOutFd
);
526 FSUtils
.delete(FSUtils
.getCurrentFileSystem(getConf()), HBCK_LOCK_PATH
, true);
527 LOG
.info("Finishing hbck");
529 } catch (IOException ioe
) {
530 LOG
.info("Failed to delete " + HBCK_LOCK_PATH
+ ", try="
531 + (retryCounter
.getAttemptTimes() + 1) + " of "
532 + retryCounter
.getMaxAttempts());
533 LOG
.debug("Failed to delete " + HBCK_LOCK_PATH
, ioe
);
535 retryCounter
.sleepUntilNextRetry();
536 } catch (InterruptedException ie
) {
537 Thread
.currentThread().interrupt();
538 LOG
.warn("Interrupted while deleting lock file" +
543 } while (retryCounter
.shouldRetry());
548 * To repair region consistency, one must call connect() in order to repair
551 public void connect() throws IOException
{
555 Pair
<Path
, FSDataOutputStream
> pair
=
556 checkAndMarkRunningHbck(getConf(), this.lockFileRetryCounterFactory
.create());
557 HBCK_LOCK_PATH
= pair
.getFirst();
558 this.hbckOutFd
= pair
.getSecond();
559 if (hbckOutFd
== null) {
561 LOG
.error("Another instance of hbck is fixing HBase, exiting this instance. " +
562 "[If you are sure no other instance is running, delete the lock file " +
563 HBCK_LOCK_PATH
+ " and rerun the tool]");
564 throw new IOException("Duplicate hbck - Abort");
567 // Make sure to cleanup the lock
568 hbckLockCleanup
.set(true);
572 // Add a shutdown hook to this thread, in case user tries to
573 // kill the hbck with a ctrl-c, we want to cleanup the lock so that
574 // it is available for further calls
575 Runtime
.getRuntime().addShutdownHook(new Thread() {
578 IOUtils
.closeQuietly(HBaseFsck
.this);
584 LOG
.info("Launching hbck");
586 connection
= (ClusterConnection
)ConnectionFactory
.createConnection(getConf());
587 admin
= connection
.getAdmin();
588 meta
= connection
.getTable(TableName
.META_TABLE_NAME
);
589 status
= admin
.getClusterMetrics(EnumSet
.of(Option
.LIVE_SERVERS
,
590 Option
.DEAD_SERVERS
, Option
.MASTER
, Option
.BACKUP_MASTERS
,
591 Option
.REGIONS_IN_TRANSITION
, Option
.HBASE_VERSION
));
595 * Get deployed regions according to the region servers.
597 private void loadDeployedRegions() throws IOException
, InterruptedException
{
598 // From the master, get a list of all known live region servers
599 Collection
<ServerName
> regionServers
= status
.getLiveServerMetrics().keySet();
600 errors
.print("Number of live region servers: " + regionServers
.size());
602 for (ServerName rsinfo
: regionServers
) {
603 errors
.print(" " + rsinfo
.getServerName());
607 // From the master, get a list of all dead region servers
608 Collection
<ServerName
> deadRegionServers
= status
.getDeadServerNames();
609 errors
.print("Number of dead region servers: " + deadRegionServers
.size());
611 for (ServerName name
: deadRegionServers
) {
612 errors
.print(" " + name
);
616 // Print the current master name and state
617 errors
.print("Master: " + status
.getMasterName());
619 // Print the list of all backup masters
620 Collection
<ServerName
> backupMasters
= status
.getBackupMasterNames();
621 errors
.print("Number of backup masters: " + backupMasters
.size());
623 for (ServerName name
: backupMasters
) {
624 errors
.print(" " + name
);
628 errors
.print("Average load: " + status
.getAverageLoad());
629 errors
.print("Number of requests: " + status
.getRequestCount());
630 errors
.print("Number of regions: " + status
.getRegionCount());
632 List
<RegionState
> rits
= status
.getRegionStatesInTransition();
633 errors
.print("Number of regions in transition: " + rits
.size());
635 for (RegionState state
: rits
) {
636 errors
.print(" " + state
.toDescriptiveString());
640 // Determine what's deployed
641 processRegionServers(regionServers
);
645 * Clear the current state of hbck.
647 private void clearState() {
648 // Make sure regionInfo is empty before starting
650 regionInfoMap
.clear();
651 emptyRegionInfoQualifiers
.clear();
655 orphanHdfsDirs
.clear();
656 skippedRegions
.clear();
660 * This repair method analyzes hbase data in hdfs and repairs it to satisfy
661 * the table integrity rules. HBase doesn't need to be online for this
664 public void offlineHdfsIntegrityRepair() throws IOException
, InterruptedException
{
665 // Initial pass to fix orphans.
666 if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
667 || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
668 LOG
.info("Loading regioninfos HDFS");
669 // if nothing is happening this should always complete in two iterations.
670 int maxIterations
= getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
673 clearState(); // clears hbck state and reset fixes to 0 and.
674 // repair what's on HDFS
675 restoreHdfsIntegrity();
676 curIter
++;// limit the number of iterations.
677 } while (fixes
> 0 && curIter
<= maxIterations
);
679 // Repairs should be done in the first iteration and verification in the second.
680 // If there are more than 2 passes, something funny has happened.
682 if (curIter
== maxIterations
) {
683 LOG
.warn("Exiting integrity repairs after max " + curIter
+ " iterations. "
684 + "Tables integrity may not be fully repaired!");
686 LOG
.info("Successfully exiting integrity repairs after " + curIter
+ " iterations");
693 * This repair method requires the cluster to be online since it contacts
694 * region servers and the masters. It makes each region's state in HDFS, in
695 * hbase:meta, and deployments consistent.
697 * @return If > 0 , number of errors detected, if < 0 there was an unrecoverable
698 * error. If 0, we have a clean hbase.
700 public int onlineConsistencyRepair() throws IOException
, KeeperException
,
701 InterruptedException
{
703 // get regions according to what is online on each RegionServer
704 loadDeployedRegions();
705 // check whether hbase:meta is deployed and online
707 // Check if hbase:meta is found only once and in the right place
708 if (!checkMetaRegion()) {
709 String errorMsg
= "hbase:meta table is not consistent. ";
710 if (shouldFixAssignments()) {
711 errorMsg
+= "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
713 errorMsg
+= "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
715 errors
.reportError(errorMsg
+ " Exiting...");
718 // Not going with further consistency check for tables when hbase:meta itself is not consistent.
719 LOG
.info("Loading regionsinfo from the hbase:meta table");
720 boolean success
= loadMetaEntries();
721 if (!success
) return -1;
723 // Empty cells in hbase:meta?
724 reportEmptyMetaCells();
726 // Check if we have to cleanup empty REGIONINFO_QUALIFIER rows from hbase:meta
727 if (shouldFixEmptyMetaCells()) {
731 // get a list of all tables that have not changed recently.
732 if (!checkMetaOnly
) {
733 reportTablesInFlux();
736 // Get disabled tables states
739 // load regiondirs and regioninfos from HDFS
740 if (shouldCheckHdfs()) {
741 LOG
.info("Loading region directories from HDFS");
742 loadHdfsRegionDirs();
743 LOG
.info("Loading region information from HDFS");
744 loadHdfsRegionInfos();
747 // fix the orphan tables
750 LOG
.info("Checking and fixing region consistency");
751 // Check and fix consistency
752 checkAndFixConsistency();
754 // Check integrity (does not fix)
756 return errors
.getErrorList().size();
760 * This method maintains an ephemeral znode. If the creation fails we return false or throw
763 * @return true if creating znode succeeds; false otherwise
764 * @throws IOException if IO failure occurs
766 private boolean setMasterInMaintenanceMode() throws IOException
{
767 RetryCounter retryCounter
= createZNodeRetryCounterFactory
.create();
768 hbckEphemeralNodePath
= ZNodePaths
.joinZNode(
769 zkw
.getZNodePaths().masterMaintZNode
,
770 "hbck-" + Long
.toString(EnvironmentEdgeManager
.currentTime()));
773 hbckZodeCreated
= ZKUtil
.createEphemeralNodeAndWatch(zkw
, hbckEphemeralNodePath
, null);
774 if (hbckZodeCreated
) {
777 } catch (KeeperException e
) {
778 if (retryCounter
.getAttemptTimes() >= retryCounter
.getMaxAttempts()) {
779 throw new IOException("Can't create znode " + hbckEphemeralNodePath
, e
);
781 // fall through and retry
784 LOG
.warn("Fail to create znode " + hbckEphemeralNodePath
+ ", try=" +
785 (retryCounter
.getAttemptTimes() + 1) + " of " + retryCounter
.getMaxAttempts());
788 retryCounter
.sleepUntilNextRetry();
789 } catch (InterruptedException ie
) {
790 throw (InterruptedIOException
) new InterruptedIOException(
791 "Can't create znode " + hbckEphemeralNodePath
).initCause(ie
);
793 } while (retryCounter
.shouldRetry());
794 return hbckZodeCreated
;
797 private void cleanupHbckZnode() {
799 if (zkw
!= null && hbckZodeCreated
) {
800 ZKUtil
.deleteNode(zkw
, hbckEphemeralNodePath
);
801 hbckZodeCreated
= false;
803 } catch (KeeperException e
) {
805 if (!e
.code().equals(KeeperException
.Code
.NONODE
)) {
806 LOG
.warn("Delete HBCK znode " + hbckEphemeralNodePath
+ " failed ", e
);
812 * Contacts the master and prints out cluster-wide information
813 * @return 0 on success, non-zero on failure
815 public int onlineHbck()
816 throws IOException
, KeeperException
, InterruptedException
, ReplicationException
{
817 // print hbase server version
818 errors
.print("Version: " + status
.getHBaseVersion());
822 // Do offline check and repair first
823 offlineHdfsIntegrityRepair();
824 offlineReferenceFileRepair();
825 offlineHLinkFileRepair();
826 // If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online
827 // hbck, it is likely that hbck would be misled and report transient errors. Therefore, it
828 // is better to set Master into maintenance mode during online hbck.
830 if (!setMasterInMaintenanceMode()) {
831 LOG
.warn("HBCK is running while master is not in maintenance mode, you might see transient "
832 + "error. Please run HBCK multiple times to reduce the chance of transient error.");
835 onlineConsistencyRepair();
837 if (checkRegionBoundaries
) {
838 checkRegionBoundaries();
841 checkAndFixReplication();
843 cleanReplicationBarrier();
845 // Remove the hbck znode
848 // Remove the hbck lock
851 // Print table summary
852 printTableSummary(tablesInfo
);
853 return errors
.summarize();
856 public static byte[] keyOnly (byte[] b
) {
859 int rowlength
= Bytes
.toShort(b
, 0);
860 byte[] result
= new byte[rowlength
];
861 System
.arraycopy(b
, Bytes
.SIZEOF_SHORT
, result
, 0, rowlength
);
866 public void close() throws IOException
{
870 } catch (Exception io
) {
871 LOG
.warn(io
.toString(), io
);
877 IOUtils
.closeQuietly(admin
);
878 IOUtils
.closeQuietly(meta
);
879 IOUtils
.closeQuietly(connection
);
883 private static class RegionBoundariesInformation
{
884 public byte [] regionName
;
885 public byte [] metaFirstKey
;
886 public byte [] metaLastKey
;
887 public byte [] storesFirstKey
;
888 public byte [] storesLastKey
;
890 public String
toString () {
891 return "regionName=" + Bytes
.toStringBinary(regionName
) +
892 "\nmetaFirstKey=" + Bytes
.toStringBinary(metaFirstKey
) +
893 "\nmetaLastKey=" + Bytes
.toStringBinary(metaLastKey
) +
894 "\nstoresFirstKey=" + Bytes
.toStringBinary(storesFirstKey
) +
895 "\nstoresLastKey=" + Bytes
.toStringBinary(storesLastKey
);
899 public void checkRegionBoundaries() {
901 ByteArrayComparator comparator
= new ByteArrayComparator();
902 List
<RegionInfo
> regions
= MetaTableAccessor
.getAllRegions(connection
, true);
903 final RegionBoundariesInformation currentRegionBoundariesInformation
=
904 new RegionBoundariesInformation();
905 Path hbaseRoot
= FSUtils
.getRootDir(getConf());
906 for (RegionInfo regionInfo
: regions
) {
907 Path tableDir
= FSUtils
.getTableDir(hbaseRoot
, regionInfo
.getTable());
908 currentRegionBoundariesInformation
.regionName
= regionInfo
.getRegionName();
909 // For each region, get the start and stop key from the META and compare them to the
910 // same information from the Stores.
911 Path path
= new Path(tableDir
, regionInfo
.getEncodedName());
912 FileSystem fs
= path
.getFileSystem(getConf());
913 FileStatus
[] files
= fs
.listStatus(path
);
914 // For all the column families in this region...
915 byte[] storeFirstKey
= null;
916 byte[] storeLastKey
= null;
917 for (FileStatus file
: files
) {
918 String fileName
= file
.getPath().toString();
919 fileName
= fileName
.substring(fileName
.lastIndexOf("/") + 1);
920 if (!fileName
.startsWith(".") && !fileName
.endsWith("recovered.edits")) {
921 FileStatus
[] storeFiles
= fs
.listStatus(file
.getPath());
922 // For all the stores in this column family.
923 for (FileStatus storeFile
: storeFiles
) {
924 HFile
.Reader reader
= HFile
.createReader(fs
, storeFile
.getPath(),
925 CacheConfig
.DISABLED
, true, getConf());
926 if ((reader
.getFirstKey() != null)
927 && ((storeFirstKey
== null) || (comparator
.compare(storeFirstKey
,
928 ((KeyValue
.KeyOnlyKeyValue
) reader
.getFirstKey().get()).getKey()) > 0))) {
929 storeFirstKey
= ((KeyValue
.KeyOnlyKeyValue
)reader
.getFirstKey().get()).getKey();
931 if ((reader
.getLastKey() != null)
932 && ((storeLastKey
== null) || (comparator
.compare(storeLastKey
,
933 ((KeyValue
.KeyOnlyKeyValue
)reader
.getLastKey().get()).getKey())) < 0)) {
934 storeLastKey
= ((KeyValue
.KeyOnlyKeyValue
)reader
.getLastKey().get()).getKey();
940 currentRegionBoundariesInformation
.metaFirstKey
= regionInfo
.getStartKey();
941 currentRegionBoundariesInformation
.metaLastKey
= regionInfo
.getEndKey();
942 currentRegionBoundariesInformation
.storesFirstKey
= keyOnly(storeFirstKey
);
943 currentRegionBoundariesInformation
.storesLastKey
= keyOnly(storeLastKey
);
944 if (currentRegionBoundariesInformation
.metaFirstKey
.length
== 0)
945 currentRegionBoundariesInformation
.metaFirstKey
= null;
946 if (currentRegionBoundariesInformation
.metaLastKey
.length
== 0)
947 currentRegionBoundariesInformation
.metaLastKey
= null;
949 // For a region to be correct, we need the META start key to be smaller or equal to the
950 // smallest start key from all the stores, and the start key from the next META entry to
951 // be bigger than the last key from all the current stores. First region start key is null;
952 // Last region end key is null; some regions can be empty and not have any store.
954 boolean valid
= true;
955 // Checking start key.
956 if ((currentRegionBoundariesInformation
.storesFirstKey
!= null)
957 && (currentRegionBoundariesInformation
.metaFirstKey
!= null)) {
959 && comparator
.compare(currentRegionBoundariesInformation
.storesFirstKey
,
960 currentRegionBoundariesInformation
.metaFirstKey
) >= 0;
962 // Checking stop key.
963 if ((currentRegionBoundariesInformation
.storesLastKey
!= null)
964 && (currentRegionBoundariesInformation
.metaLastKey
!= null)) {
966 && comparator
.compare(currentRegionBoundariesInformation
.storesLastKey
,
967 currentRegionBoundariesInformation
.metaLastKey
) < 0;
970 errors
.reportError(ERROR_CODE
.BOUNDARIES_ERROR
, "Found issues with regions boundaries",
971 tablesInfo
.get(regionInfo
.getTable()));
972 LOG
.warn("Region's boundaries not aligned between stores and META for:");
973 LOG
.warn(Objects
.toString(currentRegionBoundariesInformation
));
976 } catch (IOException e
) {
977 LOG
.error(e
.toString(), e
);
982 * Iterates through the list of all orphan/invalid regiondirs.
984 private void adoptHdfsOrphans(Collection
<HbckInfo
> orphanHdfsDirs
) throws IOException
{
985 for (HbckInfo hi
: orphanHdfsDirs
) {
986 LOG
.info("Attempting to handle orphan hdfs dir: " + hi
.getHdfsRegionDir());
992 * Orphaned regions are regions without a .regioninfo file in them. We "adopt"
993 * these orphans by creating a new region, and moving the column families,
994 * recovered edits, WALs, into the new region dir. We determine the region
995 * startkey and endkeys by looking at all of the hfiles inside the column
996 * families to identify the min and max keys. The resulting region will
997 * likely violate table integrity but will be dealt with by merging
998 * overlapping regions.
1000 @SuppressWarnings("deprecation")
1001 private void adoptHdfsOrphan(HbckInfo hi
) throws IOException
{
1002 Path p
= hi
.getHdfsRegionDir();
1003 FileSystem fs
= p
.getFileSystem(getConf());
1004 FileStatus
[] dirs
= fs
.listStatus(p
);
1006 LOG
.warn("Attempt to adopt orphan hdfs region skipped because no files present in " +
1007 p
+ ". This dir could probably be deleted.");
1011 TableName tableName
= hi
.getTableName();
1012 TableInfo tableInfo
= tablesInfo
.get(tableName
);
1013 Preconditions
.checkNotNull(tableInfo
, "Table '" + tableName
+ "' not present!");
1014 TableDescriptor template
= tableInfo
.getHTD();
1016 // find min and max key values
1017 Pair
<byte[],byte[]> orphanRegionRange
= null;
1018 for (FileStatus cf
: dirs
) {
1019 String cfName
= cf
.getPath().getName();
1020 // TODO Figure out what the special dirs are
1021 if (cfName
.startsWith(".") || cfName
.equals(HConstants
.SPLIT_LOGDIR_NAME
)) continue;
1023 FileStatus
[] hfiles
= fs
.listStatus(cf
.getPath());
1024 for (FileStatus hfile
: hfiles
) {
1026 HFile
.Reader hf
= null;
1028 hf
= HFile
.createReader(fs
, hfile
.getPath(), CacheConfig
.DISABLED
, true, getConf());
1030 Optional
<Cell
> startKv
= hf
.getFirstKey();
1031 start
= CellUtil
.cloneRow(startKv
.get());
1032 Optional
<Cell
> endKv
= hf
.getLastKey();
1033 end
= CellUtil
.cloneRow(endKv
.get());
1034 } catch (IOException ioe
) {
1035 LOG
.warn("Problem reading orphan file " + hfile
+ ", skipping");
1037 } catch (NullPointerException ioe
) {
1038 LOG
.warn("Orphan file " + hfile
+ " is possibly corrupted HFile, skipping");
1046 // expand the range to include the range of all hfiles
1047 if (orphanRegionRange
== null) {
1049 orphanRegionRange
= new Pair
<>(start
, end
);
1053 // expand range only if the hfile is wider.
1054 if (Bytes
.compareTo(orphanRegionRange
.getFirst(), start
) > 0) {
1055 orphanRegionRange
.setFirst(start
);
1057 if (Bytes
.compareTo(orphanRegionRange
.getSecond(), end
) < 0 ) {
1058 orphanRegionRange
.setSecond(end
);
1063 if (orphanRegionRange
== null) {
1064 LOG
.warn("No data in dir " + p
+ ", sidelining data");
1066 sidelineRegionDir(fs
, hi
);
1069 LOG
.info("Min max keys are : [" + Bytes
.toString(orphanRegionRange
.getFirst()) + ", " +
1070 Bytes
.toString(orphanRegionRange
.getSecond()) + ")");
1072 // create new region on hdfs. move data into place.
1073 RegionInfo regionInfo
= RegionInfoBuilder
.newBuilder(template
.getTableName())
1074 .setStartKey(orphanRegionRange
.getFirst())
1075 .setEndKey(Bytes
.add(orphanRegionRange
.getSecond(), new byte[1]))
1077 LOG
.info("Creating new region : " + regionInfo
);
1078 HRegion region
= HBaseFsckRepair
.createHDFSRegionDir(getConf(), regionInfo
, template
);
1079 Path target
= region
.getRegionFileSystem().getRegionDir();
1081 // rename all the data to new region
1082 mergeRegionDirs(target
, hi
);
1087 * This method determines if there are table integrity errors in HDFS. If
1088 * there are errors and the appropriate "fix" options are enabled, the method
1089 * will first correct orphan regions making them into legit regiondirs, and
1090 * then reload to merge potentially overlapping regions.
1092 * @return number of table integrity errors found
1094 private int restoreHdfsIntegrity() throws IOException
, InterruptedException
{
1095 // Determine what's on HDFS
1096 LOG
.info("Loading HBase regioninfo from HDFS...");
1097 loadHdfsRegionDirs(); // populating regioninfo table.
1099 int errs
= errors
.getErrorList().size();
1100 // First time just get suggestions.
1101 tablesInfo
= loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1102 checkHdfsIntegrity(false, false);
1104 if (errors
.getErrorList().size() == errs
) {
1105 LOG
.info("No integrity errors. We are done with this phase. Glorious.");
1109 if (shouldFixHdfsOrphans() && orphanHdfsDirs
.size() > 0) {
1110 adoptHdfsOrphans(orphanHdfsDirs
);
1111 // TODO optimize by incrementally adding instead of reloading.
1114 // Make sure there are no holes now.
1115 if (shouldFixHdfsHoles()) {
1116 clearState(); // this also resets # fixes.
1117 loadHdfsRegionDirs();
1118 tablesInfo
= loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1119 tablesInfo
= checkHdfsIntegrity(shouldFixHdfsHoles(), false);
1122 // Now we fix overlaps
1123 if (shouldFixHdfsOverlaps()) {
1124 // second pass we fix overlaps.
1125 clearState(); // this also resets # fixes.
1126 loadHdfsRegionDirs();
1127 tablesInfo
= loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1128 tablesInfo
= checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
1131 return errors
.getErrorList().size();
1135 * Scan all the store file names to find any lingering reference files,
1136 * which refer to some none-exiting files. If "fix" option is enabled,
1137 * any lingering reference file will be sidelined if found.
1139 * Lingering reference file prevents a region from opening. It has to
1140 * be fixed before a cluster can start properly.
1142 private void offlineReferenceFileRepair() throws IOException
, InterruptedException
{
1144 Configuration conf
= getConf();
1145 Path hbaseRoot
= FSUtils
.getRootDir(conf
);
1146 FileSystem fs
= hbaseRoot
.getFileSystem(conf
);
1147 LOG
.info("Computing mapping of all store files");
1148 Map
<String
, Path
> allFiles
= FSUtils
.getTableStoreFilePathMap(fs
, hbaseRoot
,
1149 new FSUtils
.ReferenceFileFilter(fs
), executor
, errors
);
1151 LOG
.info("Validating mapping using HDFS state");
1152 for (Path path
: allFiles
.values()) {
1153 Path referredToFile
= StoreFileInfo
.getReferredToFile(path
);
1154 if (fs
.exists(referredToFile
)) continue; // good, expected
1156 // Found a lingering reference file
1157 errors
.reportError(ERROR_CODE
.LINGERING_REFERENCE_HFILE
,
1158 "Found lingering reference file " + path
);
1159 if (!shouldFixReferenceFiles()) continue;
1161 // Now, trying to fix it since requested
1162 boolean success
= false;
1163 String pathStr
= path
.toString();
1165 // A reference file path should be like
1166 // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/referred_file.region_name
1167 // Up 5 directories to get the root folder.
1168 // So the file will be sidelined to a similar folder structure.
1169 int index
= pathStr
.lastIndexOf(Path
.SEPARATOR_CHAR
);
1170 for (int i
= 0; index
> 0 && i
< 5; i
++) {
1171 index
= pathStr
.lastIndexOf(Path
.SEPARATOR_CHAR
, index
- 1);
1174 Path rootDir
= getSidelineDir();
1175 Path dst
= new Path(rootDir
, pathStr
.substring(index
+ 1));
1176 fs
.mkdirs(dst
.getParent());
1177 LOG
.info("Trying to sideline reference file "
1178 + path
+ " to " + dst
);
1181 success
= fs
.rename(path
, dst
);
1186 LOG
.error("Failed to sideline reference file " + path
);
1192 * Scan all the store file names to find any lingering HFileLink files,
1193 * which refer to some none-exiting files. If "fix" option is enabled,
1194 * any lingering HFileLink file will be sidelined if found.
1196 private void offlineHLinkFileRepair() throws IOException
, InterruptedException
{
1197 Configuration conf
= getConf();
1198 Path hbaseRoot
= FSUtils
.getRootDir(conf
);
1199 FileSystem fs
= hbaseRoot
.getFileSystem(conf
);
1200 LOG
.info("Computing mapping of all link files");
1201 Map
<String
, Path
> allFiles
= FSUtils
1202 .getTableStoreFilePathMap(fs
, hbaseRoot
, new FSUtils
.HFileLinkFilter(), executor
, errors
);
1205 LOG
.info("Validating mapping using HDFS state");
1206 for (Path path
: allFiles
.values()) {
1207 // building HFileLink object to gather locations
1208 HFileLink actualLink
= HFileLink
.buildFromHFileLinkPattern(conf
, path
);
1209 if (actualLink
.exists(fs
)) continue; // good, expected
1211 // Found a lingering HFileLink
1212 errors
.reportError(ERROR_CODE
.LINGERING_HFILELINK
, "Found lingering HFileLink " + path
);
1213 if (!shouldFixHFileLinks()) continue;
1215 // Now, trying to fix it since requested
1218 // An HFileLink path should be like
1219 // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/linkedtable=linkedregionname-linkedhfilename
1220 // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
1221 boolean success
= sidelineFile(fs
, hbaseRoot
, path
);
1224 LOG
.error("Failed to sideline HFileLink file " + path
);
1227 // An HFileLink backreference path should be like
1228 // ${hbase.rootdir}/archive/data/namespace/table_name/region_id/family_name/.links-linkedhfilename
1229 // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
1230 Path backRefPath
= FileLink
.getBackReferencesDir(HFileArchiveUtil
1231 .getStoreArchivePath(conf
, HFileLink
.getReferencedTableName(path
.getName().toString()),
1232 HFileLink
.getReferencedRegionName(path
.getName().toString()),
1233 path
.getParent().getName()),
1234 HFileLink
.getReferencedHFileName(path
.getName().toString()));
1235 success
= sidelineFile(fs
, hbaseRoot
, backRefPath
);
1238 LOG
.error("Failed to sideline HFileLink backreference file " + path
);
1243 private boolean sidelineFile(FileSystem fs
, Path hbaseRoot
, Path path
) throws IOException
{
1244 URI uri
= hbaseRoot
.toUri().relativize(path
.toUri());
1245 if (uri
.isAbsolute()) return false;
1246 String relativePath
= uri
.getPath();
1247 Path rootDir
= getSidelineDir();
1248 Path dst
= new Path(rootDir
, relativePath
);
1249 boolean pathCreated
= fs
.mkdirs(dst
.getParent());
1251 LOG
.error("Failed to create path: " + dst
.getParent());
1254 LOG
.info("Trying to sideline file " + path
+ " to " + dst
);
1255 return fs
.rename(path
, dst
);
1259 * TODO -- need to add tests for this.
1261 private void reportEmptyMetaCells() {
1262 errors
.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " +
1263 emptyRegionInfoQualifiers
.size());
1265 for (Result r
: emptyRegionInfoQualifiers
) {
1266 errors
.print(" " + r
);
1272 * TODO -- need to add tests for this.
1274 private void reportTablesInFlux() {
1275 AtomicInteger numSkipped
= new AtomicInteger(0);
1276 TableDescriptor
[] allTables
= getTables(numSkipped
);
1277 errors
.print("Number of Tables: " + allTables
.length
);
1279 if (numSkipped
.get() > 0) {
1280 errors
.detail("Number of Tables in flux: " + numSkipped
.get());
1282 for (TableDescriptor td
: allTables
) {
1283 errors
.detail(" Table: " + td
.getTableName() + "\t" +
1284 (td
.isReadOnly() ?
"ro" : "rw") + "\t" +
1285 (td
.isMetaRegion() ?
"META" : " ") + "\t" +
1286 " families: " + td
.getColumnFamilyCount());
1291 public ErrorReporter
getErrors() {
1296 * Read the .regioninfo file from the file system. If there is no
1297 * .regioninfo, add it to the orphan hdfs region list.
1299 private void loadHdfsRegioninfo(HbckInfo hbi
) throws IOException
{
1300 Path regionDir
= hbi
.getHdfsRegionDir();
1301 if (regionDir
== null) {
1302 if (hbi
.getReplicaId() == RegionInfo
.DEFAULT_REPLICA_ID
) {
1303 // Log warning only for default/ primary replica with no region dir
1304 LOG
.warn("No HDFS region dir found: " + hbi
+ " meta=" + hbi
.metaEntry
);
1309 if (hbi
.hdfsEntry
.hri
!= null) {
1310 // already loaded data
1314 FileSystem fs
= FileSystem
.get(getConf());
1315 RegionInfo hri
= HRegionFileSystem
.loadRegionInfoFileContent(fs
, regionDir
);
1316 LOG
.debug("RegionInfo read: " + hri
.toString());
1317 hbi
.hdfsEntry
.hri
= hri
;
1321 * Exception thrown when a integrity repair operation fails in an
1324 public static class RegionRepairException
extends IOException
{
1325 private static final long serialVersionUID
= 1L;
1326 final IOException ioe
;
1327 public RegionRepairException(String s
, IOException ioe
) {
1334 * Populate hbi's from regionInfos loaded from file system.
1336 private SortedMap
<TableName
, TableInfo
> loadHdfsRegionInfos()
1337 throws IOException
, InterruptedException
{
1338 tablesInfo
.clear(); // regenerating the data
1339 // generate region split structure
1340 Collection
<HbckInfo
> hbckInfos
= regionInfoMap
.values();
1342 // Parallelized read of .regioninfo files.
1343 List
<WorkItemHdfsRegionInfo
> hbis
= new ArrayList
<>(hbckInfos
.size());
1344 List
<Future
<Void
>> hbiFutures
;
1346 for (HbckInfo hbi
: hbckInfos
) {
1347 WorkItemHdfsRegionInfo work
= new WorkItemHdfsRegionInfo(hbi
, this, errors
);
1351 // Submit and wait for completion
1352 hbiFutures
= executor
.invokeAll(hbis
);
1354 for(int i
=0; i
<hbiFutures
.size(); i
++) {
1355 WorkItemHdfsRegionInfo work
= hbis
.get(i
);
1356 Future
<Void
> f
= hbiFutures
.get(i
);
1359 } catch(ExecutionException e
) {
1360 LOG
.warn("Failed to read .regioninfo file for region " +
1361 work
.hbi
.getRegionNameAsString(), e
.getCause());
1365 Path hbaseRoot
= FSUtils
.getRootDir(getConf());
1366 FileSystem fs
= hbaseRoot
.getFileSystem(getConf());
1367 // serialized table info gathering.
1368 for (HbckInfo hbi
: hbckInfos
) {
1370 if (hbi
.getHdfsHRI() == null) {
1376 // get table name from hdfs, populate various HBaseFsck tables.
1377 TableName tableName
= hbi
.getTableName();
1378 if (tableName
== null) {
1379 // There was an entry in hbase:meta not in the HDFS?
1380 LOG
.warn("tableName was null for: " + hbi
);
1384 TableInfo modTInfo
= tablesInfo
.get(tableName
);
1385 if (modTInfo
== null) {
1386 // only executed once per table.
1387 modTInfo
= new TableInfo(tableName
);
1388 tablesInfo
.put(tableName
, modTInfo
);
1390 TableDescriptor htd
=
1391 FSTableDescriptors
.getTableDescriptorFromFs(fs
, hbaseRoot
, tableName
);
1392 modTInfo
.htds
.add(htd
);
1393 } catch (IOException ioe
) {
1394 if (!orphanTableDirs
.containsKey(tableName
)) {
1395 LOG
.warn("Unable to read .tableinfo from " + hbaseRoot
, ioe
);
1396 //should only report once for each table
1397 errors
.reportError(ERROR_CODE
.NO_TABLEINFO_FILE
,
1398 "Unable to read .tableinfo from " + hbaseRoot
+ "/" + tableName
);
1399 Set
<String
> columns
= new HashSet
<>();
1400 orphanTableDirs
.put(tableName
, getColumnFamilyList(columns
, hbi
));
1404 if (!hbi
.isSkipChecks()) {
1405 modTInfo
.addRegionInfo(hbi
);
1409 loadTableInfosForTablesWithNoRegion();
1416 * To get the column family list according to the column family dirs
1419 * @return a set of column families
1420 * @throws IOException
1422 private Set
<String
> getColumnFamilyList(Set
<String
> columns
, HbckInfo hbi
) throws IOException
{
1423 Path regionDir
= hbi
.getHdfsRegionDir();
1424 FileSystem fs
= regionDir
.getFileSystem(getConf());
1425 FileStatus
[] subDirs
= fs
.listStatus(regionDir
, new FSUtils
.FamilyDirFilter(fs
));
1426 for (FileStatus subdir
: subDirs
) {
1427 String columnfamily
= subdir
.getPath().getName();
1428 columns
.add(columnfamily
);
1434 * To fabricate a .tableinfo file with following contents<br>
1435 * 1. the correct tablename <br>
1436 * 2. the correct colfamily list<br>
1437 * 3. the default properties for both {@link TableDescriptor} and {@link ColumnFamilyDescriptor}<br>
1438 * @throws IOException
1440 private boolean fabricateTableInfo(FSTableDescriptors fstd
, TableName tableName
,
1441 Set
<String
> columns
) throws IOException
{
1442 if (columns
==null || columns
.isEmpty()) return false;
1443 TableDescriptorBuilder builder
= TableDescriptorBuilder
.newBuilder(tableName
);
1444 for (String columnfamimly
: columns
) {
1445 builder
.setColumnFamily(ColumnFamilyDescriptorBuilder
.of(columnfamimly
));
1447 fstd
.createTableDescriptor(builder
.build(), true);
1452 * To fix the empty REGIONINFO_QUALIFIER rows from hbase:meta <br>
1453 * @throws IOException
1455 public void fixEmptyMetaCells() throws IOException
{
1456 if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers
.isEmpty()) {
1457 LOG
.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1458 for (Result region
: emptyRegionInfoQualifiers
) {
1459 deleteMetaRegion(region
.getRow());
1460 errors
.getErrorList().remove(ERROR_CODE
.EMPTY_META_CELL
);
1462 emptyRegionInfoQualifiers
.clear();
1467 * To fix orphan table by creating a .tableinfo file under tableDir <br>
1468 * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
1469 * 2. else create a default .tableinfo file with following items<br>
1470 * 2.1 the correct tablename <br>
1471 * 2.2 the correct colfamily list<br>
1472 * 2.3 the default properties for both {@link TableDescriptor} and {@link ColumnFamilyDescriptor}<br>
1473 * @throws IOException
1475 public void fixOrphanTables() throws IOException
{
1476 if (shouldFixTableOrphans() && !orphanTableDirs
.isEmpty()) {
1478 List
<TableName
> tmpList
= new ArrayList
<>(orphanTableDirs
.keySet().size());
1479 tmpList
.addAll(orphanTableDirs
.keySet());
1480 TableDescriptor
[] htds
= getTableDescriptors(tmpList
);
1481 Iterator
<Entry
<TableName
, Set
<String
>>> iter
=
1482 orphanTableDirs
.entrySet().iterator();
1484 int numFailedCase
= 0;
1485 FSTableDescriptors fstd
= new FSTableDescriptors(getConf());
1486 while (iter
.hasNext()) {
1487 Entry
<TableName
, Set
<String
>> entry
=
1489 TableName tableName
= entry
.getKey();
1490 LOG
.info("Trying to fix orphan table error: " + tableName
);
1491 if (j
< htds
.length
) {
1492 if (tableName
.equals(htds
[j
].getTableName())) {
1493 TableDescriptor htd
= htds
[j
];
1494 LOG
.info("fixing orphan table: " + tableName
+ " from cache");
1495 fstd
.createTableDescriptor(htd
, true);
1500 if (fabricateTableInfo(fstd
, tableName
, entry
.getValue())) {
1501 LOG
.warn("fixing orphan table: " + tableName
+ " with a default .tableinfo file");
1502 LOG
.warn("Strongly recommend to modify the TableDescriptor if necessary for: " + tableName
);
1505 LOG
.error("Unable to create default .tableinfo for " + tableName
+ " while missing column family information");
1512 if (orphanTableDirs
.isEmpty()) {
1513 // all orphanTableDirs are luckily recovered
1514 // re-run doFsck after recovering the .tableinfo file
1516 LOG
.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1517 } else if (numFailedCase
> 0) {
1518 LOG
.error("Failed to fix " + numFailedCase
1519 + " OrphanTables with default .tableinfo files");
1524 orphanTableDirs
.clear();
1529 * This borrows code from MasterFileSystem.bootstrap(). Explicitly creates it's own WAL, so be
1530 * sure to close it as well as the region when you're finished.
1531 * @param walFactoryID A unique identifier for WAL factory. Filesystem implementations will use
1532 * this ID to make a directory inside WAL directory path.
1533 * @return an open hbase:meta HRegion
1535 private HRegion
createNewMeta(String walFactoryID
) throws IOException
{
1536 Path rootdir
= FSUtils
.getRootDir(getConf());
1537 Configuration c
= getConf();
1538 RegionInfo metaHRI
= RegionInfoBuilder
.FIRST_META_REGIONINFO
;
1539 TableDescriptor metaDescriptor
= new FSTableDescriptors(c
).get(TableName
.META_TABLE_NAME
);
1540 MasterFileSystem
.setInfoFamilyCachingForMeta(metaDescriptor
, false);
1541 // The WAL subsystem will use the default rootDir rather than the passed in rootDir
1542 // unless I pass along via the conf.
1543 Configuration confForWAL
= new Configuration(c
);
1544 confForWAL
.set(HConstants
.HBASE_DIR
, rootdir
.toString());
1545 WAL wal
= new WALFactory(confForWAL
, walFactoryID
).getWAL(metaHRI
);
1546 HRegion meta
= HRegion
.createHRegion(metaHRI
, rootdir
, c
, metaDescriptor
, wal
);
1547 MasterFileSystem
.setInfoFamilyCachingForMeta(metaDescriptor
, true);
1552 * Generate set of puts to add to new meta. This expects the tables to be
1553 * clean with no overlaps or holes. If there are any problems it returns null.
1555 * @return An array list of puts to do in bulk, null if tables have problems
1557 private ArrayList
<Put
> generatePuts(SortedMap
<TableName
, TableInfo
> tablesInfo
)
1558 throws IOException
{
1559 ArrayList
<Put
> puts
= new ArrayList
<>();
1560 boolean hasProblems
= false;
1561 for (Entry
<TableName
, TableInfo
> e
: tablesInfo
.entrySet()) {
1562 TableName name
= e
.getKey();
1564 // skip "hbase:meta"
1565 if (name
.compareTo(TableName
.META_TABLE_NAME
) == 0) {
1569 TableInfo ti
= e
.getValue();
1570 puts
.add(MetaTableAccessor
.makePutFromTableState(
1571 new TableState(ti
.tableName
, TableState
.State
.ENABLED
),
1572 EnvironmentEdgeManager
.currentTime()));
1573 for (Entry
<byte[], Collection
<HbckInfo
>> spl
: ti
.sc
.getStarts().asMap()
1575 Collection
<HbckInfo
> his
= spl
.getValue();
1576 int sz
= his
.size();
1579 LOG
.error("Split starting at " + Bytes
.toStringBinary(spl
.getKey())
1580 + " had " + sz
+ " regions instead of exactly 1." );
1585 // add the row directly to meta.
1586 HbckInfo hi
= his
.iterator().next();
1587 RegionInfo hri
= hi
.getHdfsHRI(); // hi.metaEntry;
1588 Put p
= MetaTableAccessor
.makePutFromRegionInfo(hri
, EnvironmentEdgeManager
.currentTime());
1592 return hasProblems ?
null : puts
;
1596 * Suggest fixes for each table
1598 private void suggestFixes(
1599 SortedMap
<TableName
, TableInfo
> tablesInfo
) throws IOException
{
1601 for (TableInfo tInfo
: tablesInfo
.values()) {
1602 TableIntegrityErrorHandler handler
= tInfo
.new IntegrityFixSuggester(tInfo
, errors
);
1603 tInfo
.checkRegionChain(handler
);
1608 * Rebuilds meta from information in hdfs/fs. Depends on configuration settings passed into
1609 * hbck constructor to point to a particular fs/dir. Assumes HBase is OFFLINE.
1611 * @param fix flag that determines if method should attempt to fix holes
1612 * @return true if successful, false if attempt failed.
1614 public boolean rebuildMeta(boolean fix
) throws IOException
,
1615 InterruptedException
{
1617 // TODO check to make sure hbase is offline. (or at least the table
1618 // currently being worked on is off line)
1620 // Determine what's on HDFS
1621 LOG
.info("Loading HBase regioninfo from HDFS...");
1622 loadHdfsRegionDirs(); // populating regioninfo table.
1624 int errs
= errors
.getErrorList().size();
1625 tablesInfo
= loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1626 checkHdfsIntegrity(false, false);
1629 if (errors
.getErrorList().size() != errs
) {
1630 // While in error state, iterate until no more fixes possible
1633 suggestFixes(tablesInfo
);
1635 loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1636 checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
1638 int errCount
= errors
.getErrorList().size();
1642 return false; // failed to fix problems.
1644 break; // no fixes and no problems? drop out and fix stuff!
1650 // we can rebuild, move old meta out of the way and start
1651 LOG
.info("HDFS regioninfo's seems good. Sidelining old hbase:meta");
1652 Path backupDir
= sidelineOldMeta();
1654 LOG
.info("Creating new hbase:meta");
1655 String walFactoryId
= "hbck-meta-recovery-" + RandomStringUtils
.randomNumeric(8);
1656 HRegion meta
= createNewMeta(walFactoryId
);
1659 List
<Put
> puts
= generatePuts(tablesInfo
);
1661 LOG
.error(HBaseMarkers
.FATAL
, "Problem encountered when creating new hbase:meta "
1662 + "entries. You may need to restore the previously sidelined hbase:meta");
1665 meta
.batchMutate(puts
.toArray(new Put
[puts
.size()]), HConstants
.NO_NONCE
, HConstants
.NO_NONCE
);
1667 if (meta
.getWAL() != null) {
1668 meta
.getWAL().close();
1670 // clean up the temporary hbck meta recovery WAL directory
1671 removeHBCKMetaRecoveryWALDir(walFactoryId
);
1672 LOG
.info("Success! hbase:meta table rebuilt.");
1673 LOG
.info("Old hbase:meta is moved into " + backupDir
);
1678 * Removes the empty Meta recovery WAL directory.
1679 * @param walFactoryId A unique identifier for WAL factory which was used by Filesystem to make a
1680 * Meta recovery WAL directory inside WAL directory path.
1682 private void removeHBCKMetaRecoveryWALDir(String walFactoryId
) throws IOException
{
1683 Path walLogDir
= new Path(new Path(CommonFSUtils
.getWALRootDir(getConf()),
1684 HConstants
.HREGION_LOGDIR_NAME
), walFactoryId
);
1685 FileSystem fs
= CommonFSUtils
.getWALFileSystem(getConf());
1686 FileStatus
[] walFiles
= FSUtils
.listStatus(fs
, walLogDir
, null);
1687 if (walFiles
== null || walFiles
.length
== 0) {
1688 LOG
.info("HBCK meta recovery WAL directory is empty, removing it now.");
1689 if (!FSUtils
.deleteDirectory(fs
, walLogDir
)) {
1690 LOG
.warn("Couldn't clear the HBCK Meta recovery WAL directory " + walLogDir
);
1696 * Log an appropriate message about whether or not overlapping merges are computed in parallel.
1698 private void logParallelMerge() {
1699 if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
1700 LOG
.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" +
1701 " false to run serially.");
1703 LOG
.info("Handling overlap merges serially. set hbasefsck.overlap.merge.parallel to" +
1704 " true to run in parallel.");
1708 private SortedMap
<TableName
, TableInfo
> checkHdfsIntegrity(boolean fixHoles
,
1709 boolean fixOverlaps
) throws IOException
{
1710 LOG
.info("Checking HBase region split map from HDFS data...");
1712 for (TableInfo tInfo
: tablesInfo
.values()) {
1713 TableIntegrityErrorHandler handler
;
1714 if (fixHoles
|| fixOverlaps
) {
1715 handler
= tInfo
.new HDFSIntegrityFixer(tInfo
, errors
, getConf(),
1716 fixHoles
, fixOverlaps
);
1718 handler
= tInfo
.new IntegrityFixSuggester(tInfo
, errors
);
1720 if (!tInfo
.checkRegionChain(handler
)) {
1721 // should dump info as well.
1722 errors
.report("Found inconsistency in table " + tInfo
.getName());
1728 private Path
getSidelineDir() throws IOException
{
1729 if (sidelineDir
== null) {
1730 Path hbaseDir
= FSUtils
.getRootDir(getConf());
1731 Path hbckDir
= new Path(hbaseDir
, HConstants
.HBCK_SIDELINEDIR_NAME
);
1732 sidelineDir
= new Path(hbckDir
, hbaseDir
.getName() + "-"
1739 * Sideline a region dir (instead of deleting it)
1741 Path
sidelineRegionDir(FileSystem fs
, HbckInfo hi
) throws IOException
{
1742 return sidelineRegionDir(fs
, null, hi
);
1746 * Sideline a region dir (instead of deleting it)
1748 * @param parentDir if specified, the region will be sidelined to folder like
1749 * {@literal .../parentDir/<table name>/<region name>}. The purpose is to group together
1750 * similar regions sidelined, for example, those regions should be bulk loaded back later
1751 * on. If NULL, it is ignored.
1753 Path
sidelineRegionDir(FileSystem fs
,
1754 String parentDir
, HbckInfo hi
) throws IOException
{
1755 TableName tableName
= hi
.getTableName();
1756 Path regionDir
= hi
.getHdfsRegionDir();
1758 if (!fs
.exists(regionDir
)) {
1759 LOG
.warn("No previous " + regionDir
+ " exists. Continuing.");
1763 Path rootDir
= getSidelineDir();
1764 if (parentDir
!= null) {
1765 rootDir
= new Path(rootDir
, parentDir
);
1767 Path sidelineTableDir
= FSUtils
.getTableDir(rootDir
, tableName
);
1768 Path sidelineRegionDir
= new Path(sidelineTableDir
, regionDir
.getName());
1769 fs
.mkdirs(sidelineRegionDir
);
1770 boolean success
= false;
1771 FileStatus
[] cfs
= fs
.listStatus(regionDir
);
1773 LOG
.info("Region dir is empty: " + regionDir
);
1775 for (FileStatus cf
: cfs
) {
1776 Path src
= cf
.getPath();
1777 Path dst
= new Path(sidelineRegionDir
, src
.getName());
1778 if (fs
.isFile(src
)) {
1780 success
= fs
.rename(src
, dst
);
1782 String msg
= "Unable to rename file " + src
+ " to " + dst
;
1784 throw new IOException(msg
);
1792 LOG
.info("Sidelining files from " + src
+ " into containing region " + dst
);
1793 // FileSystem.rename is inconsistent with directories -- if the
1794 // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
1795 // it moves the src into the dst dir resulting in (foo/a/b). If
1796 // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
1797 FileStatus
[] hfiles
= fs
.listStatus(src
);
1798 if (hfiles
!= null && hfiles
.length
> 0) {
1799 for (FileStatus hfile
: hfiles
) {
1800 success
= fs
.rename(hfile
.getPath(), dst
);
1802 String msg
= "Unable to rename file " + src
+ " to " + dst
;
1804 throw new IOException(msg
);
1808 LOG
.debug("Sideline directory contents:");
1809 debugLsr(sidelineRegionDir
);
1813 LOG
.info("Removing old region dir: " + regionDir
);
1814 success
= fs
.delete(regionDir
, true);
1816 String msg
= "Unable to delete dir " + regionDir
;
1818 throw new IOException(msg
);
1820 return sidelineRegionDir
;
1824 * Side line an entire table.
1826 void sidelineTable(FileSystem fs
, TableName tableName
, Path hbaseDir
,
1827 Path backupHbaseDir
) throws IOException
{
1828 Path tableDir
= FSUtils
.getTableDir(hbaseDir
, tableName
);
1829 if (fs
.exists(tableDir
)) {
1830 Path backupTableDir
= FSUtils
.getTableDir(backupHbaseDir
, tableName
);
1831 fs
.mkdirs(backupTableDir
.getParent());
1832 boolean success
= fs
.rename(tableDir
, backupTableDir
);
1834 throw new IOException("Failed to move " + tableName
+ " from "
1835 + tableDir
+ " to " + backupTableDir
);
1838 LOG
.info("No previous " + tableName
+ " exists. Continuing.");
1843 * @return Path to backup of original directory
1845 Path
sidelineOldMeta() throws IOException
{
1846 // put current hbase:meta aside.
1847 Path hbaseDir
= FSUtils
.getRootDir(getConf());
1848 FileSystem fs
= hbaseDir
.getFileSystem(getConf());
1849 Path backupDir
= getSidelineDir();
1850 fs
.mkdirs(backupDir
);
1853 sidelineTable(fs
, TableName
.META_TABLE_NAME
, hbaseDir
, backupDir
);
1854 } catch (IOException e
) {
1855 LOG
.error(HBaseMarkers
.FATAL
, "... failed to sideline meta. Currently in "
1856 + "inconsistent state. To restore try to rename hbase:meta in " +
1857 backupDir
.getName() + " to " + hbaseDir
.getName() + ".", e
);
1858 throw e
; // throw original exception
1864 * Load the list of disabled tables in ZK into local set.
1865 * @throws ZooKeeperConnectionException
1866 * @throws IOException
1868 private void loadTableStates()
1869 throws IOException
{
1870 tableStates
= MetaTableAccessor
.getTableStates(connection
);
1871 // Add hbase:meta so this tool keeps working. In hbase2, meta is always enabled though it
1872 // has no entry in the table states. HBCK doesn't work right w/ hbase2 but just do this in
1874 this.tableStates
.put(TableName
.META_TABLE_NAME
,
1875 new TableState(TableName
.META_TABLE_NAME
, TableState
.State
.ENABLED
));
1879 * Check if the specified region's table is disabled.
1880 * @param tableName table to check status of
1882 private boolean isTableDisabled(TableName tableName
) {
1883 return tableStates
.containsKey(tableName
)
1884 && tableStates
.get(tableName
)
1885 .inStates(TableState
.State
.DISABLED
, TableState
.State
.DISABLING
);
1889 * Scan HDFS for all regions, recording their information into
1892 public void loadHdfsRegionDirs() throws IOException
, InterruptedException
{
1893 Path rootDir
= FSUtils
.getRootDir(getConf());
1894 FileSystem fs
= rootDir
.getFileSystem(getConf());
1896 // list all tables from HDFS
1897 List
<FileStatus
> tableDirs
= Lists
.newArrayList();
1899 boolean foundVersionFile
= fs
.exists(new Path(rootDir
, HConstants
.VERSION_FILE_NAME
));
1901 List
<Path
> paths
= FSUtils
.getTableDirs(fs
, rootDir
);
1902 for (Path path
: paths
) {
1903 TableName tableName
= FSUtils
.getTableName(path
);
1904 if ((!checkMetaOnly
&&
1905 isTableIncluded(tableName
)) ||
1906 tableName
.equals(TableName
.META_TABLE_NAME
)) {
1907 tableDirs
.add(fs
.getFileStatus(path
));
1911 // verify that version file exists
1912 if (!foundVersionFile
) {
1913 errors
.reportError(ERROR_CODE
.NO_VERSION_FILE
,
1914 "Version file does not exist in root dir " + rootDir
);
1915 if (shouldFixVersionFile()) {
1916 LOG
.info("Trying to create a new " + HConstants
.VERSION_FILE_NAME
1919 FSUtils
.setVersion(fs
, rootDir
, getConf().getInt(
1920 HConstants
.THREAD_WAKE_FREQUENCY
, 10 * 1000), getConf().getInt(
1921 HConstants
.VERSION_FILE_WRITE_ATTEMPTS
,
1922 HConstants
.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS
));
1926 // Avoid multithreading at table-level because already multithreaded internally at
1927 // region-level. Additionally multithreading at table-level can lead to deadlock
1928 // if there are many tables in the cluster. Since there are a limited # of threads
1929 // in the executor's thread pool and if we multithread at the table-level by putting
1930 // WorkItemHdfsDir callables into the executor, then we will have some threads in the
1931 // executor tied up solely in waiting for the tables' region-level calls to complete.
1932 // If there are enough tables then there will be no actual threads in the pool left
1933 // for the region-level callables to be serviced.
1934 for (FileStatus tableDir
: tableDirs
) {
1935 LOG
.debug("Loading region dirs from " +tableDir
.getPath());
1936 WorkItemHdfsDir item
= new WorkItemHdfsDir(fs
, errors
, tableDir
);
1939 } catch (ExecutionException e
) {
1940 LOG
.warn("Could not completely load table dir " +
1941 tableDir
.getPath(), e
.getCause());
1948 * Record the location of the hbase:meta region as found in ZooKeeper.
1950 private boolean recordMetaRegion() throws IOException
{
1951 RegionLocations rl
= connection
.locateRegion(TableName
.META_TABLE_NAME
,
1952 HConstants
.EMPTY_START_ROW
, false, false);
1954 errors
.reportError(ERROR_CODE
.NULL_META_REGION
,
1955 "META region was not found in ZooKeeper");
1958 for (HRegionLocation metaLocation
: rl
.getRegionLocations()) {
1959 // Check if Meta region is valid and existing
1960 if (metaLocation
== null ) {
1961 errors
.reportError(ERROR_CODE
.NULL_META_REGION
,
1962 "META region location is null");
1965 if (metaLocation
.getRegionInfo() == null) {
1966 errors
.reportError(ERROR_CODE
.NULL_META_REGION
,
1967 "META location regionInfo is null");
1970 if (metaLocation
.getHostname() == null) {
1971 errors
.reportError(ERROR_CODE
.NULL_META_REGION
,
1972 "META location hostName is null");
1975 ServerName sn
= metaLocation
.getServerName();
1976 MetaEntry m
= new MetaEntry(metaLocation
.getRegionInfo(), sn
, EnvironmentEdgeManager
.currentTime());
1977 HbckInfo hbckInfo
= regionInfoMap
.get(metaLocation
.getRegionInfo().getEncodedName());
1978 if (hbckInfo
== null) {
1979 regionInfoMap
.put(metaLocation
.getRegionInfo().getEncodedName(), new HbckInfo(m
));
1981 hbckInfo
.metaEntry
= m
;
1987 private ZKWatcher
createZooKeeperWatcher() throws IOException
{
1988 return new ZKWatcher(getConf(), "hbase Fsck", new Abortable() {
1990 public void abort(String why
, Throwable e
) {
1996 public boolean isAborted() {
2004 * Contacts each regionserver and fetches metadata about regions.
2005 * @param regionServerList - the list of region servers to connect to
2006 * @throws IOException if a remote or network exception occurs
2008 void processRegionServers(Collection
<ServerName
> regionServerList
)
2009 throws IOException
, InterruptedException
{
2011 List
<WorkItemRegion
> workItems
= new ArrayList
<>(regionServerList
.size());
2012 List
<Future
<Void
>> workFutures
;
2014 // loop to contact each region server in parallel
2015 for (ServerName rsinfo
: regionServerList
) {
2016 workItems
.add(new WorkItemRegion(this, rsinfo
, errors
, connection
));
2019 workFutures
= executor
.invokeAll(workItems
);
2021 for(int i
=0; i
<workFutures
.size(); i
++) {
2022 WorkItemRegion item
= workItems
.get(i
);
2023 Future
<Void
> f
= workFutures
.get(i
);
2026 } catch(ExecutionException e
) {
2027 LOG
.warn("Could not process regionserver " + item
.rsinfo
.getHostAndPort(),
2034 * Check consistency of all regions that have been found in previous phases.
2036 private void checkAndFixConsistency()
2037 throws IOException
, KeeperException
, InterruptedException
{
2038 // Divide the checks in two phases. One for default/primary replicas and another
2039 // for the non-primary ones. Keeps code cleaner this way.
2041 List
<CheckRegionConsistencyWorkItem
> workItems
= new ArrayList
<>(regionInfoMap
.size());
2042 for (java
.util
.Map
.Entry
<String
, HbckInfo
> e
: regionInfoMap
.entrySet()) {
2043 if (e
.getValue().getReplicaId() == RegionInfo
.DEFAULT_REPLICA_ID
) {
2044 workItems
.add(new CheckRegionConsistencyWorkItem(e
.getKey(), e
.getValue()));
2047 checkRegionConsistencyConcurrently(workItems
);
2049 boolean prevHdfsCheck
= shouldCheckHdfs();
2050 setCheckHdfs(false); //replicas don't have any hdfs data
2051 // Run a pass over the replicas and fix any assignment issues that exist on the currently
2052 // deployed/undeployed replicas.
2053 List
<CheckRegionConsistencyWorkItem
> replicaWorkItems
= new ArrayList
<>(regionInfoMap
.size());
2054 for (java
.util
.Map
.Entry
<String
, HbckInfo
> e
: regionInfoMap
.entrySet()) {
2055 if (e
.getValue().getReplicaId() != RegionInfo
.DEFAULT_REPLICA_ID
) {
2056 replicaWorkItems
.add(new CheckRegionConsistencyWorkItem(e
.getKey(), e
.getValue()));
2059 checkRegionConsistencyConcurrently(replicaWorkItems
);
2060 setCheckHdfs(prevHdfsCheck
);
2062 // If some regions is skipped during checkRegionConsistencyConcurrently() phase, we might
2063 // not get accurate state of the hbase if continuing. The config here allows users to tune
2064 // the tolerance of number of skipped region.
2065 // TODO: evaluate the consequence to continue the hbck operation without config.
2066 int terminateThreshold
= getConf().getInt("hbase.hbck.skipped.regions.limit", 0);
2067 int numOfSkippedRegions
= skippedRegions
.size();
2068 if (numOfSkippedRegions
> 0 && numOfSkippedRegions
> terminateThreshold
) {
2069 throw new IOException(numOfSkippedRegions
2070 + " region(s) could not be checked or repaired. See logs for detail.");
2073 if (shouldCheckHdfs()) {
2074 checkAndFixTableStates();
2079 * Check consistency of all regions using mulitple threads concurrently.
2081 private void checkRegionConsistencyConcurrently(
2082 final List
<CheckRegionConsistencyWorkItem
> workItems
)
2083 throws IOException
, KeeperException
, InterruptedException
{
2084 if (workItems
.isEmpty()) {
2085 return; // nothing to check
2088 List
<Future
<Void
>> workFutures
= executor
.invokeAll(workItems
);
2089 for(Future
<Void
> f
: workFutures
) {
2092 } catch(ExecutionException e1
) {
2093 LOG
.warn("Could not check region consistency " , e1
.getCause());
2094 if (e1
.getCause() instanceof IOException
) {
2095 throw (IOException
)e1
.getCause();
2096 } else if (e1
.getCause() instanceof KeeperException
) {
2097 throw (KeeperException
)e1
.getCause();
2098 } else if (e1
.getCause() instanceof InterruptedException
) {
2099 throw (InterruptedException
)e1
.getCause();
2101 throw new IOException(e1
.getCause());
2107 class CheckRegionConsistencyWorkItem
implements Callable
<Void
> {
2108 private final String key
;
2109 private final HbckInfo hbi
;
2111 CheckRegionConsistencyWorkItem(String key
, HbckInfo hbi
) {
2117 public synchronized Void
call() throws Exception
{
2119 checkRegionConsistency(key
, hbi
);
2120 } catch (Exception e
) {
2121 // If the region is non-META region, skip this region and send warning/error message; if
2122 // the region is META region, we should not continue.
2123 LOG
.warn("Unable to complete check or repair the region '" + hbi
.getRegionNameAsString()
2125 if (hbi
.getHdfsHRI().isMetaRegion()) {
2128 LOG
.warn("Skip region '" + hbi
.getRegionNameAsString() + "'");
2129 addSkippedRegion(hbi
);
2135 private void addSkippedRegion(final HbckInfo hbi
) {
2136 Set
<String
> skippedRegionNames
= skippedRegions
.get(hbi
.getTableName());
2137 if (skippedRegionNames
== null) {
2138 skippedRegionNames
= new HashSet
<>();
2140 skippedRegionNames
.add(hbi
.getRegionNameAsString());
2141 skippedRegions
.put(hbi
.getTableName(), skippedRegionNames
);
2145 * Check and fix table states, assumes full info available:
2147 * - empty tables loaded
2149 private void checkAndFixTableStates() throws IOException
{
2150 // first check dangling states
2151 for (Entry
<TableName
, TableState
> entry
: tableStates
.entrySet()) {
2152 TableName tableName
= entry
.getKey();
2153 TableState tableState
= entry
.getValue();
2154 TableInfo tableInfo
= tablesInfo
.get(tableName
);
2155 if (isTableIncluded(tableName
)
2156 && !tableName
.isSystemTable()
2157 && tableInfo
== null) {
2159 MetaTableAccessor
.deleteTableState(connection
, tableName
);
2160 TableState state
= MetaTableAccessor
.getTableState(connection
, tableName
);
2161 if (state
!= null) {
2162 errors
.reportError(ERROR_CODE
.ORPHAN_TABLE_STATE
,
2163 tableName
+ " unable to delete dangling table state " + tableState
);
2165 } else if (!checkMetaOnly
) {
2166 // dangling table state in meta if checkMetaOnly is false. If checkMetaOnly is
2167 // true, tableInfo will be null as tablesInfo are not polulated for all tables from hdfs
2168 errors
.reportError(ERROR_CODE
.ORPHAN_TABLE_STATE
,
2169 tableName
+ " has dangling table state " + tableState
);
2173 // check that all tables have states
2174 for (TableName tableName
: tablesInfo
.keySet()) {
2175 if (isTableIncluded(tableName
) && !tableStates
.containsKey(tableName
)) {
2177 MetaTableAccessor
.updateTableState(connection
, tableName
, TableState
.State
.ENABLED
);
2178 TableState newState
= MetaTableAccessor
.getTableState(connection
, tableName
);
2179 if (newState
== null) {
2180 errors
.reportError(ERROR_CODE
.NO_TABLE_STATE
,
2181 "Unable to change state for table " + tableName
+ " in meta ");
2184 errors
.reportError(ERROR_CODE
.NO_TABLE_STATE
,
2185 tableName
+ " has no state in meta ");
2191 private void preCheckPermission() throws IOException
, AccessDeniedException
{
2192 if (shouldIgnorePreCheckPermission()) {
2196 Path hbaseDir
= FSUtils
.getRootDir(getConf());
2197 FileSystem fs
= hbaseDir
.getFileSystem(getConf());
2198 UserProvider userProvider
= UserProvider
.instantiate(getConf());
2199 UserGroupInformation ugi
= userProvider
.getCurrent().getUGI();
2200 FileStatus
[] files
= fs
.listStatus(hbaseDir
);
2201 for (FileStatus file
: files
) {
2203 FSUtils
.checkAccess(ugi
, file
, FsAction
.WRITE
);
2204 } catch (AccessDeniedException ace
) {
2205 LOG
.warn("Got AccessDeniedException when preCheckPermission ", ace
);
2206 errors
.reportError(ERROR_CODE
.WRONG_USAGE
, "Current user " + ugi
.getUserName()
2207 + " does not have write perms to " + file
.getPath()
2208 + ". Please rerun hbck as hdfs user " + file
.getOwner());
2215 * Deletes region from meta table
2217 private void deleteMetaRegion(HbckInfo hi
) throws IOException
{
2218 deleteMetaRegion(hi
.metaEntry
.getRegionName());
2222 * Deletes region from meta table
2224 private void deleteMetaRegion(byte[] metaKey
) throws IOException
{
2225 Delete d
= new Delete(metaKey
);
2227 LOG
.info("Deleted " + Bytes
.toString(metaKey
) + " from META" );
2231 * Reset the split parent region info in meta table
2233 private void resetSplitParent(HbckInfo hi
) throws IOException
{
2234 RowMutations mutations
= new RowMutations(hi
.metaEntry
.getRegionName());
2235 Delete d
= new Delete(hi
.metaEntry
.getRegionName());
2236 d
.addColumn(HConstants
.CATALOG_FAMILY
, HConstants
.SPLITA_QUALIFIER
);
2237 d
.addColumn(HConstants
.CATALOG_FAMILY
, HConstants
.SPLITB_QUALIFIER
);
2240 RegionInfo hri
= RegionInfoBuilder
.newBuilder(hi
.metaEntry
)
2244 Put p
= MetaTableAccessor
.makePutFromRegionInfo(hri
, EnvironmentEdgeManager
.currentTime());
2247 meta
.mutateRow(mutations
);
2248 LOG
.info("Reset split parent " + hi
.metaEntry
.getRegionNameAsString() + " in META" );
2252 * This backwards-compatibility wrapper for permanently offlining a region
2253 * that should not be alive. If the region server does not support the
2254 * "offline" method, it will use the closest unassign method instead. This
2255 * will basically work until one attempts to disable or delete the affected
2256 * table. The problem has to do with in-memory only master state, so
2257 * restarting the HMaster or failing over to another should fix this.
2259 private void offline(byte[] regionName
) throws IOException
{
2260 String regionString
= Bytes
.toStringBinary(regionName
);
2261 if (!rsSupportsOffline
) {
2262 LOG
.warn("Using unassign region " + regionString
2263 + " instead of using offline method, you should"
2264 + " restart HMaster after these repairs");
2265 admin
.unassign(regionName
, true);
2269 // first time we assume the rs's supports #offline.
2271 LOG
.info("Offlining region " + regionString
);
2272 admin
.offline(regionName
);
2273 } catch (IOException ioe
) {
2274 String notFoundMsg
= "java.lang.NoSuchMethodException: " +
2275 "org.apache.hadoop.hbase.master.HMaster.offline([B)";
2276 if (ioe
.getMessage().contains(notFoundMsg
)) {
2277 LOG
.warn("Using unassign region " + regionString
2278 + " instead of using offline method, you should"
2279 + " restart HMaster after these repairs");
2280 rsSupportsOffline
= false; // in the future just use unassign
2281 admin
.unassign(regionName
, true);
2288 private void undeployRegions(HbckInfo hi
) throws IOException
, InterruptedException
{
2289 undeployRegionsForHbi(hi
);
2290 // undeploy replicas of the region (but only if the method is invoked for the primary)
2291 if (hi
.getReplicaId() != RegionInfo
.DEFAULT_REPLICA_ID
) {
2294 int numReplicas
= admin
.getTableDescriptor(hi
.getTableName()).getRegionReplication();
2295 for (int i
= 1; i
< numReplicas
; i
++) {
2296 if (hi
.getPrimaryHRIForDeployedReplica() == null) continue;
2297 RegionInfo hri
= RegionReplicaUtil
.getRegionInfoForReplica(
2298 hi
.getPrimaryHRIForDeployedReplica(), i
);
2299 HbckInfo h
= regionInfoMap
.get(hri
.getEncodedName());
2301 undeployRegionsForHbi(h
);
2302 //set skip checks; we undeployed it, and we don't want to evaluate this anymore
2303 //in consistency checks
2304 h
.setSkipChecks(true);
2309 private void undeployRegionsForHbi(HbckInfo hi
) throws IOException
, InterruptedException
{
2310 for (OnlineEntry rse
: hi
.deployedEntries
) {
2311 LOG
.debug("Undeploy region " + rse
.hri
+ " from " + rse
.hsa
);
2313 HBaseFsckRepair
.closeRegionSilentlyAndWait(connection
, rse
.hsa
, rse
.hri
);
2314 offline(rse
.hri
.getRegionName());
2315 } catch (IOException ioe
) {
2316 LOG
.warn("Got exception when attempting to offline region "
2317 + Bytes
.toString(rse
.hri
.getRegionName()), ioe
);
2323 * Attempts to undeploy a region from a region server based in information in
2324 * META. Any operations that modify the file system should make sure that
2325 * its corresponding region is not deployed to prevent data races.
2327 * A separate call is required to update the master in-memory region state
2328 * kept in the AssignementManager. Because disable uses this state instead of
2329 * that found in META, we can't seem to cleanly disable/delete tables that
2330 * have been hbck fixed. When used on a version of HBase that does not have
2331 * the offline ipc call exposed on the master (<0.90.5, <0.92.0) a master
2332 * restart or failover may be required.
2334 private void closeRegion(HbckInfo hi
) throws IOException
, InterruptedException
{
2335 if (hi
.metaEntry
== null && hi
.hdfsEntry
== null) {
2336 undeployRegions(hi
);
2340 // get assignment info and hregioninfo from meta.
2341 Get get
= new Get(hi
.getRegionName());
2342 get
.addColumn(HConstants
.CATALOG_FAMILY
, HConstants
.REGIONINFO_QUALIFIER
);
2343 get
.addColumn(HConstants
.CATALOG_FAMILY
, HConstants
.SERVER_QUALIFIER
);
2344 get
.addColumn(HConstants
.CATALOG_FAMILY
, HConstants
.STARTCODE_QUALIFIER
);
2345 // also get the locations of the replicas to close if the primary region is being closed
2346 if (hi
.getReplicaId() == RegionInfo
.DEFAULT_REPLICA_ID
) {
2347 int numReplicas
= admin
.getTableDescriptor(hi
.getTableName()).getRegionReplication();
2348 for (int i
= 0; i
< numReplicas
; i
++) {
2349 get
.addColumn(HConstants
.CATALOG_FAMILY
, MetaTableAccessor
.getServerColumn(i
));
2350 get
.addColumn(HConstants
.CATALOG_FAMILY
, MetaTableAccessor
.getStartCodeColumn(i
));
2353 Result r
= meta
.get(get
);
2354 RegionLocations rl
= MetaTableAccessor
.getRegionLocations(r
);
2356 LOG
.warn("Unable to close region " + hi
.getRegionNameAsString() +
2357 " since meta does not have handle to reach it");
2360 for (HRegionLocation h
: rl
.getRegionLocations()) {
2361 ServerName serverName
= h
.getServerName();
2362 if (serverName
== null) {
2363 errors
.reportError("Unable to close region "
2364 + hi
.getRegionNameAsString() + " because meta does not "
2365 + "have handle to reach it.");
2368 RegionInfo hri
= h
.getRegionInfo();
2370 LOG
.warn("Unable to close region " + hi
.getRegionNameAsString()
2371 + " because hbase:meta had invalid or missing "
2372 + HConstants
.CATALOG_FAMILY_STR
+ ":"
2373 + Bytes
.toString(HConstants
.REGIONINFO_QUALIFIER
)
2374 + " qualifier value.");
2377 // close the region -- close files and remove assignment
2378 HBaseFsckRepair
.closeRegionSilentlyAndWait(connection
, serverName
, hri
);
2382 private void tryAssignmentRepair(HbckInfo hbi
, String msg
) throws IOException
,
2383 KeeperException
, InterruptedException
{
2384 // If we are trying to fix the errors
2385 if (shouldFixAssignments()) {
2387 undeployRegions(hbi
);
2389 RegionInfo hri
= hbi
.getHdfsHRI();
2391 hri
= hbi
.metaEntry
;
2393 HBaseFsckRepair
.fixUnassigned(admin
, hri
);
2394 HBaseFsckRepair
.waitUntilAssigned(admin
, hri
);
2396 // also assign replicas if needed (do it only when this call operates on a primary replica)
2397 if (hbi
.getReplicaId() != RegionInfo
.DEFAULT_REPLICA_ID
) return;
2398 int replicationCount
= admin
.getTableDescriptor(hri
.getTable()).getRegionReplication();
2399 for (int i
= 1; i
< replicationCount
; i
++) {
2400 hri
= RegionReplicaUtil
.getRegionInfoForReplica(hri
, i
);
2401 HbckInfo h
= regionInfoMap
.get(hri
.getEncodedName());
2404 //set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore
2405 //in consistency checks
2406 h
.setSkipChecks(true);
2408 HBaseFsckRepair
.fixUnassigned(admin
, hri
);
2409 HBaseFsckRepair
.waitUntilAssigned(admin
, hri
);
2416 * Check a single region for consistency and correct deployment.
2418 private void checkRegionConsistency(final String key
, final HbckInfo hbi
)
2419 throws IOException
, KeeperException
, InterruptedException
{
2421 if (hbi
.isSkipChecks()) return;
2422 String descriptiveName
= hbi
.toString();
2423 boolean inMeta
= hbi
.metaEntry
!= null;
2424 // In case not checking HDFS, assume the region is on HDFS
2425 boolean inHdfs
= !shouldCheckHdfs() || hbi
.getHdfsRegionDir() != null;
2426 boolean hasMetaAssignment
= inMeta
&& hbi
.metaEntry
.regionServer
!= null;
2427 boolean isDeployed
= !hbi
.deployedOn
.isEmpty();
2428 boolean isMultiplyDeployed
= hbi
.deployedOn
.size() > 1;
2429 boolean deploymentMatchesMeta
=
2430 hasMetaAssignment
&& isDeployed
&& !isMultiplyDeployed
&&
2431 hbi
.metaEntry
.regionServer
.equals(hbi
.deployedOn
.get(0));
2432 boolean splitParent
=
2433 inMeta
&& hbi
.metaEntry
.isSplit() && hbi
.metaEntry
.isOffline();
2434 boolean shouldBeDeployed
= inMeta
&& !isTableDisabled(hbi
.metaEntry
.getTable());
2435 boolean recentlyModified
= inHdfs
&&
2436 hbi
.getModTime() + timelag
> EnvironmentEdgeManager
.currentTime();
2438 // ========== First the healthy cases =============
2439 if (hbi
.containsOnlyHdfsEdits()) {
2442 if (inMeta
&& inHdfs
&& isDeployed
&& deploymentMatchesMeta
&& shouldBeDeployed
) {
2444 } else if (inMeta
&& inHdfs
&& !shouldBeDeployed
&& !isDeployed
) {
2445 LOG
.info("Region " + descriptiveName
+ " is in META, and in a disabled " +
2446 "tabled that is not deployed");
2448 } else if (recentlyModified
) {
2449 LOG
.warn("Region " + descriptiveName
+ " was recently modified -- skipping");
2452 // ========== Cases where the region is not in hbase:meta =============
2453 else if (!inMeta
&& !inHdfs
&& !isDeployed
) {
2454 // We shouldn't have record of this region at all then!
2455 assert false : "Entry for region with no data";
2456 } else if (!inMeta
&& !inHdfs
&& isDeployed
) {
2457 errors
.reportError(ERROR_CODE
.NOT_IN_META_HDFS
, "Region "
2458 + descriptiveName
+ ", key=" + key
+ ", not on HDFS or in hbase:meta but " +
2459 "deployed on " + Joiner
.on(", ").join(hbi
.deployedOn
));
2460 if (shouldFixAssignments()) {
2461 undeployRegions(hbi
);
2464 } else if (!inMeta
&& inHdfs
&& !isDeployed
) {
2465 if (hbi
.isMerged()) {
2466 // This region has already been merged, the remaining hdfs file will be
2467 // cleaned by CatalogJanitor later
2468 hbi
.setSkipChecks(true);
2469 LOG
.info("Region " + descriptiveName
2470 + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
2473 errors
.reportError(ERROR_CODE
.NOT_IN_META_OR_DEPLOYED
, "Region "
2474 + descriptiveName
+ " on HDFS, but not listed in hbase:meta " +
2475 "or deployed on any region server");
2476 // restore region consistency of an adopted orphan
2477 if (shouldFixMeta()) {
2478 if (!hbi
.isHdfsRegioninfoPresent()) {
2479 LOG
.error("Region " + hbi
.getHdfsHRI() + " could have been repaired"
2480 + " in table integrity repair phase if -fixHdfsOrphans was" +
2485 RegionInfo hri
= hbi
.getHdfsHRI();
2486 TableInfo tableInfo
= tablesInfo
.get(hri
.getTable());
2488 for (RegionInfo region
: tableInfo
.getRegionsFromMeta()) {
2489 if (Bytes
.compareTo(region
.getStartKey(), hri
.getStartKey()) <= 0
2490 && (region
.getEndKey().length
== 0 || Bytes
.compareTo(region
.getEndKey(),
2491 hri
.getEndKey()) >= 0)
2492 && Bytes
.compareTo(region
.getStartKey(), hri
.getEndKey()) <= 0) {
2493 if(region
.isSplit() || region
.isOffline()) continue;
2494 Path regionDir
= hbi
.getHdfsRegionDir();
2495 FileSystem fs
= regionDir
.getFileSystem(getConf());
2496 List
<Path
> familyDirs
= FSUtils
.getFamilyDirs(fs
, regionDir
);
2497 for (Path familyDir
: familyDirs
) {
2498 List
<Path
> referenceFilePaths
= FSUtils
.getReferenceFilePaths(fs
, familyDir
);
2499 for (Path referenceFilePath
: referenceFilePaths
) {
2500 Path parentRegionDir
=
2501 StoreFileInfo
.getReferredToFile(referenceFilePath
).getParent().getParent();
2502 if (parentRegionDir
.toString().endsWith(region
.getEncodedName())) {
2503 LOG
.warn(hri
+ " start and stop keys are in the range of " + region
2504 + ". The region might not be cleaned up from hdfs when region " + region
2505 + " split failed. Hence deleting from hdfs.");
2506 HRegionFileSystem
.deleteRegionFromFileSystem(getConf(), fs
,
2507 regionDir
.getParent(), hri
);
2514 LOG
.info("Patching hbase:meta with .regioninfo: " + hbi
.getHdfsHRI());
2515 int numReplicas
= admin
.getTableDescriptor(hbi
.getTableName()).getRegionReplication();
2516 HBaseFsckRepair
.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi
.getHdfsHRI(),
2517 admin
.getClusterMetrics(EnumSet
.of(Option
.LIVE_SERVERS
))
2518 .getLiveServerMetrics().keySet(), numReplicas
);
2520 tryAssignmentRepair(hbi
, "Trying to reassign region...");
2523 } else if (!inMeta
&& inHdfs
&& isDeployed
) {
2524 errors
.reportError(ERROR_CODE
.NOT_IN_META
, "Region " + descriptiveName
2525 + " not in META, but deployed on " + Joiner
.on(", ").join(hbi
.deployedOn
));
2526 debugLsr(hbi
.getHdfsRegionDir());
2527 if (hbi
.getReplicaId() != RegionInfo
.DEFAULT_REPLICA_ID
) {
2528 // for replicas, this means that we should undeploy the region (we would have
2529 // gone over the primaries and fixed meta holes in first phase under
2530 // checkAndFixConsistency; we shouldn't get the condition !inMeta at
2531 // this stage unless unwanted replica)
2532 if (shouldFixAssignments()) {
2533 undeployRegionsForHbi(hbi
);
2536 if (shouldFixMeta() && hbi
.getReplicaId() == RegionInfo
.DEFAULT_REPLICA_ID
) {
2537 if (!hbi
.isHdfsRegioninfoPresent()) {
2538 LOG
.error("This should have been repaired in table integrity repair phase");
2542 LOG
.info("Patching hbase:meta with with .regioninfo: " + hbi
.getHdfsHRI());
2543 int numReplicas
= admin
.getTableDescriptor(hbi
.getTableName()).getRegionReplication();
2544 HBaseFsckRepair
.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi
.getHdfsHRI(),
2545 admin
.getClusterMetrics(EnumSet
.of(Option
.LIVE_SERVERS
))
2546 .getLiveServerMetrics().keySet(), numReplicas
);
2547 tryAssignmentRepair(hbi
, "Trying to fix unassigned region...");
2550 // ========== Cases where the region is in hbase:meta =============
2551 } else if (inMeta
&& inHdfs
&& !isDeployed
&& splitParent
) {
2552 // check whether this is an actual error, or just transient state where parent
2554 if (hbi
.metaEntry
.splitA
!= null && hbi
.metaEntry
.splitB
!= null) {
2555 // check that split daughters are there
2556 HbckInfo infoA
= this.regionInfoMap
.get(hbi
.metaEntry
.splitA
.getEncodedName());
2557 HbckInfo infoB
= this.regionInfoMap
.get(hbi
.metaEntry
.splitB
.getEncodedName());
2558 if (infoA
!= null && infoB
!= null) {
2559 // we already processed or will process daughters. Move on, nothing to see here.
2560 hbi
.setSkipChecks(true);
2565 // For Replica region, we need to do a similar check. If replica is not split successfully,
2566 // error is going to be reported against primary daughter region.
2567 if (hbi
.getReplicaId() != RegionInfo
.DEFAULT_REPLICA_ID
) {
2568 LOG
.info("Region " + descriptiveName
+ " is a split parent in META, in HDFS, "
2569 + "and not deployed on any region server. This may be transient.");
2570 hbi
.setSkipChecks(true);
2574 errors
.reportError(ERROR_CODE
.LINGERING_SPLIT_PARENT
, "Region "
2575 + descriptiveName
+ " is a split parent in META, in HDFS, "
2576 + "and not deployed on any region server. This could be transient, "
2577 + "consider to run the catalog janitor first!");
2578 if (shouldFixSplitParents()) {
2580 resetSplitParent(hbi
);
2582 } else if (inMeta
&& !inHdfs
&& !isDeployed
) {
2583 errors
.reportError(ERROR_CODE
.NOT_IN_HDFS_OR_DEPLOYED
, "Region "
2584 + descriptiveName
+ " found in META, but not in HDFS "
2585 + "or deployed on any region server.");
2586 if (shouldFixMeta()) {
2587 deleteMetaRegion(hbi
);
2589 } else if (inMeta
&& !inHdfs
&& isDeployed
) {
2590 errors
.reportError(ERROR_CODE
.NOT_IN_HDFS
, "Region " + descriptiveName
2591 + " found in META, but not in HDFS, " +
2592 "and deployed on " + Joiner
.on(", ").join(hbi
.deployedOn
));
2593 // We treat HDFS as ground truth. Any information in meta is transient
2594 // and equivalent data can be regenerated. So, lets unassign and remove
2595 // these problems from META.
2596 if (shouldFixAssignments()) {
2597 errors
.print("Trying to fix unassigned region...");
2598 undeployRegions(hbi
);
2600 if (shouldFixMeta()) {
2601 // wait for it to complete
2602 deleteMetaRegion(hbi
);
2604 } else if (inMeta
&& inHdfs
&& !isDeployed
&& shouldBeDeployed
) {
2605 errors
.reportError(ERROR_CODE
.NOT_DEPLOYED
, "Region " + descriptiveName
2606 + " not deployed on any region server.");
2607 tryAssignmentRepair(hbi
, "Trying to fix unassigned region...");
2608 } else if (inMeta
&& inHdfs
&& isDeployed
&& !shouldBeDeployed
) {
2609 errors
.reportError(ERROR_CODE
.SHOULD_NOT_BE_DEPLOYED
,
2610 "Region " + descriptiveName
+ " should not be deployed according " +
2611 "to META, but is deployed on " + Joiner
.on(", ").join(hbi
.deployedOn
));
2612 if (shouldFixAssignments()) {
2613 errors
.print("Trying to close the region " + descriptiveName
);
2615 HBaseFsckRepair
.fixMultiAssignment(connection
, hbi
.metaEntry
, hbi
.deployedOn
);
2617 } else if (inMeta
&& inHdfs
&& isMultiplyDeployed
) {
2618 errors
.reportError(ERROR_CODE
.MULTI_DEPLOYED
, "Region " + descriptiveName
2619 + " is listed in hbase:meta on region server " + hbi
.metaEntry
.regionServer
2620 + " but is multiply assigned to region servers " +
2621 Joiner
.on(", ").join(hbi
.deployedOn
));
2622 // If we are trying to fix the errors
2623 if (shouldFixAssignments()) {
2624 errors
.print("Trying to fix assignment error...");
2626 HBaseFsckRepair
.fixMultiAssignment(connection
, hbi
.metaEntry
, hbi
.deployedOn
);
2628 } else if (inMeta
&& inHdfs
&& isDeployed
&& !deploymentMatchesMeta
) {
2629 errors
.reportError(ERROR_CODE
.SERVER_DOES_NOT_MATCH_META
, "Region "
2630 + descriptiveName
+ " listed in hbase:meta on region server " +
2631 hbi
.metaEntry
.regionServer
+ " but found on region server " +
2632 hbi
.deployedOn
.get(0));
2633 // If we are trying to fix the errors
2634 if (shouldFixAssignments()) {
2635 errors
.print("Trying to fix assignment error...");
2637 HBaseFsckRepair
.fixMultiAssignment(connection
, hbi
.metaEntry
, hbi
.deployedOn
);
2638 HBaseFsckRepair
.waitUntilAssigned(admin
, hbi
.getHdfsHRI());
2641 errors
.reportError(ERROR_CODE
.UNKNOWN
, "Region " + descriptiveName
+
2642 " is in an unforeseen state:" +
2643 " inMeta=" + inMeta
+
2644 " inHdfs=" + inHdfs
+
2645 " isDeployed=" + isDeployed
+
2646 " isMultiplyDeployed=" + isMultiplyDeployed
+
2647 " deploymentMatchesMeta=" + deploymentMatchesMeta
+
2648 " shouldBeDeployed=" + shouldBeDeployed
);
2653 * Checks tables integrity. Goes over all regions and scans the tables.
2654 * Collects all the pieces for each table and checks if there are missing,
2655 * repeated or overlapping ones.
2656 * @throws IOException
2658 SortedMap
<TableName
, TableInfo
> checkIntegrity() throws IOException
{
2659 tablesInfo
= new TreeMap
<>();
2660 LOG
.debug("There are " + regionInfoMap
.size() + " region info entries");
2661 for (HbckInfo hbi
: regionInfoMap
.values()) {
2662 // Check only valid, working regions
2663 if (hbi
.metaEntry
== null) {
2664 // this assumes that consistency check has run loadMetaEntry
2665 Path p
= hbi
.getHdfsRegionDir();
2667 errors
.report("No regioninfo in Meta or HDFS. " + hbi
);
2673 if (hbi
.metaEntry
.regionServer
== null) {
2674 errors
.detail("Skipping region because no region server: " + hbi
);
2677 if (hbi
.metaEntry
.isOffline()) {
2678 errors
.detail("Skipping region because it is offline: " + hbi
);
2681 if (hbi
.containsOnlyHdfsEdits()) {
2682 errors
.detail("Skipping region because it only contains edits" + hbi
);
2686 // Missing regionDir or over-deployment is checked elsewhere. Include
2687 // these cases in modTInfo, so we can evaluate those regions as part of
2688 // the region chain in META
2689 //if (hbi.foundRegionDir == null) continue;
2690 //if (hbi.deployedOn.size() != 1) continue;
2691 if (hbi
.deployedOn
.isEmpty()) continue;
2693 // We should be safe here
2694 TableName tableName
= hbi
.metaEntry
.getTable();
2695 TableInfo modTInfo
= tablesInfo
.get(tableName
);
2696 if (modTInfo
== null) {
2697 modTInfo
= new TableInfo(tableName
);
2699 for (ServerName server
: hbi
.deployedOn
) {
2700 modTInfo
.addServer(server
);
2703 if (!hbi
.isSkipChecks()) {
2704 modTInfo
.addRegionInfo(hbi
);
2707 tablesInfo
.put(tableName
, modTInfo
);
2710 loadTableInfosForTablesWithNoRegion();
2713 for (TableInfo tInfo
: tablesInfo
.values()) {
2714 TableIntegrityErrorHandler handler
= tInfo
.new IntegrityFixSuggester(tInfo
, errors
);
2715 if (!tInfo
.checkRegionChain(handler
)) {
2716 errors
.report("Found inconsistency in table " + tInfo
.getName());
2722 /** Loads table info's for tables that may not have been included, since there are no
2723 * regions reported for the table, but table dir is there in hdfs
2725 private void loadTableInfosForTablesWithNoRegion() throws IOException
{
2726 Map
<String
, TableDescriptor
> allTables
= new FSTableDescriptors(getConf()).getAll();
2727 for (TableDescriptor htd
: allTables
.values()) {
2728 if (checkMetaOnly
&& !htd
.isMetaTable()) {
2732 TableName tableName
= htd
.getTableName();
2733 if (isTableIncluded(tableName
) && !tablesInfo
.containsKey(tableName
)) {
2734 TableInfo tableInfo
= new TableInfo(tableName
);
2735 tableInfo
.htds
.add(htd
);
2736 tablesInfo
.put(htd
.getTableName(), tableInfo
);
2742 * Merge hdfs data by moving from contained HbckInfo into targetRegionDir.
2743 * @return number of file move fixes done to merge regions.
2745 public int mergeRegionDirs(Path targetRegionDir
, HbckInfo contained
) throws IOException
{
2747 String thread
= Thread
.currentThread().getName();
2748 LOG
.debug("[" + thread
+ "] Contained region dir after close and pause");
2749 debugLsr(contained
.getHdfsRegionDir());
2751 // rename the contained into the container.
2752 FileSystem fs
= targetRegionDir
.getFileSystem(getConf());
2753 FileStatus
[] dirs
= null;
2755 dirs
= fs
.listStatus(contained
.getHdfsRegionDir());
2756 } catch (FileNotFoundException fnfe
) {
2757 // region we are attempting to merge in is not present! Since this is a merge, there is
2758 // no harm skipping this region if it does not exist.
2759 if (!fs
.exists(contained
.getHdfsRegionDir())) {
2760 LOG
.warn("[" + thread
+ "] HDFS region dir " + contained
.getHdfsRegionDir()
2761 + " is missing. Assuming already sidelined or moved.");
2763 sidelineRegionDir(fs
, contained
);
2769 if (!fs
.exists(contained
.getHdfsRegionDir())) {
2770 LOG
.warn("[" + thread
+ "] HDFS region dir " + contained
.getHdfsRegionDir()
2771 + " already sidelined.");
2773 sidelineRegionDir(fs
, contained
);
2778 for (FileStatus cf
: dirs
) {
2779 Path src
= cf
.getPath();
2780 Path dst
= new Path(targetRegionDir
, src
.getName());
2782 if (src
.getName().equals(HRegionFileSystem
.REGION_INFO_FILE
)) {
2783 // do not copy the old .regioninfo file.
2787 if (src
.getName().equals(HConstants
.HREGION_OLDLOGDIR_NAME
)) {
2788 // do not copy the .oldlogs files
2792 LOG
.info("[" + thread
+ "] Moving files from " + src
+ " into containing region " + dst
);
2793 // FileSystem.rename is inconsistent with directories -- if the
2794 // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
2795 // it moves the src into the dst dir resulting in (foo/a/b). If
2796 // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
2797 for (FileStatus hfile
: fs
.listStatus(src
)) {
2798 boolean success
= fs
.rename(hfile
.getPath(), dst
);
2803 LOG
.debug("[" + thread
+ "] Sideline directory contents:");
2804 debugLsr(targetRegionDir
);
2808 sidelineRegionDir(fs
, contained
);
2809 LOG
.info("[" + thread
+ "] Sidelined region dir "+ contained
.getHdfsRegionDir() + " into " +
2811 debugLsr(contained
.getHdfsRegionDir());
2817 static class WorkItemOverlapMerge
implements Callable
<Void
> {
2818 private TableIntegrityErrorHandler handler
;
2819 Collection
<HbckInfo
> overlapgroup
;
2821 WorkItemOverlapMerge(Collection
<HbckInfo
> overlapgroup
, TableIntegrityErrorHandler handler
) {
2822 this.handler
= handler
;
2823 this.overlapgroup
= overlapgroup
;
2827 public Void
call() throws Exception
{
2828 handler
.handleOverlapGroup(overlapgroup
);
2834 * Maintain information about a particular table.
2836 public class TableInfo
{
2837 TableName tableName
;
2838 TreeSet
<ServerName
> deployedOn
;
2840 // backwards regions
2841 final List
<HbckInfo
> backwards
= new ArrayList
<>();
2843 // sidelined big overlapped regions
2844 final Map
<Path
, HbckInfo
> sidelinedRegions
= new HashMap
<>();
2846 // region split calculator
2847 final RegionSplitCalculator
<HbckInfo
> sc
= new RegionSplitCalculator
<>(cmp
);
2849 // Histogram of different TableDescriptors found. Ideally there is only one!
2850 final Set
<TableDescriptor
> htds
= new HashSet
<>();
2852 // key = start split, values = set of splits in problem group
2853 final Multimap
<byte[], HbckInfo
> overlapGroups
=
2854 TreeMultimap
.create(RegionSplitCalculator
.BYTES_COMPARATOR
, cmp
);
2856 // list of regions derived from meta entries.
2857 private ImmutableList
<RegionInfo
> regionsFromMeta
= null;
2859 TableInfo(TableName name
) {
2860 this.tableName
= name
;
2861 deployedOn
= new TreeSet
<>();
2865 * @return descriptor common to all regions. null if are none or multiple!
2867 private TableDescriptor
getHTD() {
2868 if (htds
.size() == 1) {
2869 return (TableDescriptor
)htds
.toArray()[0];
2871 LOG
.error("None/Multiple table descriptors found for table '"
2872 + tableName
+ "' regions: " + htds
);
2877 public void addRegionInfo(HbckInfo hir
) {
2878 if (Bytes
.equals(hir
.getEndKey(), HConstants
.EMPTY_END_ROW
)) {
2879 // end key is absolute end key, just add it.
2880 // ignore replicas other than primary for these checks
2881 if (hir
.getReplicaId() == RegionInfo
.DEFAULT_REPLICA_ID
) sc
.add(hir
);
2885 // if not the absolute end key, check for cycle
2886 if (Bytes
.compareTo(hir
.getStartKey(), hir
.getEndKey()) > 0) {
2888 ERROR_CODE
.REGION_CYCLE
,
2889 String
.format("The endkey for this region comes before the "
2890 + "startkey, startkey=%s, endkey=%s",
2891 Bytes
.toStringBinary(hir
.getStartKey()),
2892 Bytes
.toStringBinary(hir
.getEndKey())), this, hir
);
2897 // main case, add to split calculator
2898 // ignore replicas other than primary for these checks
2899 if (hir
.getReplicaId() == RegionInfo
.DEFAULT_REPLICA_ID
) sc
.add(hir
);
2902 public void addServer(ServerName server
) {
2903 this.deployedOn
.add(server
);
2906 public TableName
getName() {
2910 public int getNumRegions() {
2911 return sc
.getStarts().size() + backwards
.size();
2914 public synchronized ImmutableList
<RegionInfo
> getRegionsFromMeta() {
2915 // lazy loaded, synchronized to ensure a single load
2916 if (regionsFromMeta
== null) {
2917 List
<RegionInfo
> regions
= new ArrayList
<>();
2918 for (HbckInfo h
: HBaseFsck
.this.regionInfoMap
.values()) {
2919 if (tableName
.equals(h
.getTableName())) {
2920 if (h
.metaEntry
!= null) {
2921 regions
.add(h
.metaEntry
);
2925 regionsFromMeta
= Ordering
.from(RegionInfo
.COMPARATOR
).immutableSortedCopy(regions
);
2928 return regionsFromMeta
;
2931 private class IntegrityFixSuggester
extends TableIntegrityErrorHandlerImpl
{
2932 ErrorReporter errors
;
2934 IntegrityFixSuggester(TableInfo ti
, ErrorReporter errors
) {
2935 this.errors
= errors
;
2940 public void handleRegionStartKeyNotEmpty(HbckInfo hi
) throws IOException
{
2941 errors
.reportError(ERROR_CODE
.FIRST_REGION_STARTKEY_NOT_EMPTY
,
2942 "First region should start with an empty key. You need to "
2943 + " create a new region and regioninfo in HDFS to plug the hole.",
2944 getTableInfo(), hi
);
2948 public void handleRegionEndKeyNotEmpty(byte[] curEndKey
) throws IOException
{
2949 errors
.reportError(ERROR_CODE
.LAST_REGION_ENDKEY_NOT_EMPTY
,
2950 "Last region should end with an empty key. You need to "
2951 + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
2955 public void handleDegenerateRegion(HbckInfo hi
) throws IOException
{
2956 errors
.reportError(ERROR_CODE
.DEGENERATE_REGION
,
2957 "Region has the same start and end key.", getTableInfo(), hi
);
2961 public void handleDuplicateStartKeys(HbckInfo r1
, HbckInfo r2
) throws IOException
{
2962 byte[] key
= r1
.getStartKey();
2964 errors
.reportError(ERROR_CODE
.DUPE_STARTKEYS
,
2965 "Multiple regions have the same startkey: "
2966 + Bytes
.toStringBinary(key
), getTableInfo(), r1
);
2967 errors
.reportError(ERROR_CODE
.DUPE_STARTKEYS
,
2968 "Multiple regions have the same startkey: "
2969 + Bytes
.toStringBinary(key
), getTableInfo(), r2
);
2973 public void handleSplit(HbckInfo r1
, HbckInfo r2
) throws IOException
{
2974 byte[] key
= r1
.getStartKey();
2976 errors
.reportError(ERROR_CODE
.DUPE_ENDKEYS
,
2977 "Multiple regions have the same regionID: "
2978 + Bytes
.toStringBinary(key
), getTableInfo(), r1
);
2979 errors
.reportError(ERROR_CODE
.DUPE_ENDKEYS
,
2980 "Multiple regions have the same regionID: "
2981 + Bytes
.toStringBinary(key
), getTableInfo(), r2
);
2985 public void handleOverlapInRegionChain(HbckInfo hi1
, HbckInfo hi2
) throws IOException
{
2986 errors
.reportError(ERROR_CODE
.OVERLAP_IN_REGION_CHAIN
,
2987 "There is an overlap in the region chain.",
2988 getTableInfo(), hi1
, hi2
);
2992 public void handleHoleInRegionChain(byte[] holeStart
, byte[] holeStop
) throws IOException
{
2994 ERROR_CODE
.HOLE_IN_REGION_CHAIN
,
2995 "There is a hole in the region chain between "
2996 + Bytes
.toStringBinary(holeStart
) + " and "
2997 + Bytes
.toStringBinary(holeStop
)
2998 + ". You need to create a new .regioninfo and region "
2999 + "dir in hdfs to plug the hole.");
3004 * This handler fixes integrity errors from hdfs information. There are
3005 * basically three classes of integrity problems 1) holes, 2) overlaps, and
3006 * 3) invalid regions.
3008 * This class overrides methods that fix holes and the overlap group case.
3009 * Individual cases of particular overlaps are handled by the general
3010 * overlap group merge repair case.
3012 * If hbase is online, this forces regions offline before doing merge
3015 private class HDFSIntegrityFixer
extends IntegrityFixSuggester
{
3018 boolean fixOverlaps
= true;
3020 HDFSIntegrityFixer(TableInfo ti
, ErrorReporter errors
, Configuration conf
,
3021 boolean fixHoles
, boolean fixOverlaps
) {
3024 this.fixOverlaps
= fixOverlaps
;
3025 // TODO properly use fixHoles
3029 * This is a special case hole -- when the first region of a table is
3030 * missing from META, HBase doesn't acknowledge the existance of the
3034 public void handleRegionStartKeyNotEmpty(HbckInfo next
) throws IOException
{
3035 errors
.reportError(ERROR_CODE
.FIRST_REGION_STARTKEY_NOT_EMPTY
,
3036 "First region should start with an empty key. Creating a new " +
3037 "region and regioninfo in HDFS to plug the hole.",
3038 getTableInfo(), next
);
3039 TableDescriptor htd
= getTableInfo().getHTD();
3040 // from special EMPTY_START_ROW to next region's startKey
3041 RegionInfo newRegion
= RegionInfoBuilder
.newBuilder(htd
.getTableName())
3042 .setStartKey(HConstants
.EMPTY_START_ROW
)
3043 .setEndKey(next
.getStartKey())
3047 HRegion region
= HBaseFsckRepair
.createHDFSRegionDir(conf
, newRegion
, htd
);
3048 LOG
.info("Table region start key was not empty. Created new empty region: "
3049 + newRegion
+ " " +region
);
3054 public void handleRegionEndKeyNotEmpty(byte[] curEndKey
) throws IOException
{
3055 errors
.reportError(ERROR_CODE
.LAST_REGION_ENDKEY_NOT_EMPTY
,
3056 "Last region should end with an empty key. Creating a new "
3057 + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
3058 TableDescriptor htd
= getTableInfo().getHTD();
3059 // from curEndKey to EMPTY_START_ROW
3060 RegionInfo newRegion
= RegionInfoBuilder
.newBuilder(htd
.getTableName())
3061 .setStartKey(curEndKey
)
3062 .setEndKey(HConstants
.EMPTY_START_ROW
)
3065 HRegion region
= HBaseFsckRepair
.createHDFSRegionDir(conf
, newRegion
, htd
);
3066 LOG
.info("Table region end key was not empty. Created new empty region: " + newRegion
3072 * There is a hole in the hdfs regions that violates the table integrity
3073 * rules. Create a new empty region that patches the hole.
3076 public void handleHoleInRegionChain(byte[] holeStartKey
, byte[] holeStopKey
) throws IOException
{
3078 ERROR_CODE
.HOLE_IN_REGION_CHAIN
,
3079 "There is a hole in the region chain between "
3080 + Bytes
.toStringBinary(holeStartKey
) + " and "
3081 + Bytes
.toStringBinary(holeStopKey
)
3082 + ". Creating a new regioninfo and region "
3083 + "dir in hdfs to plug the hole.");
3084 TableDescriptor htd
= getTableInfo().getHTD();
3085 RegionInfo newRegion
= RegionInfoBuilder
.newBuilder(htd
.getTableName())
3086 .setStartKey(holeStartKey
)
3087 .setEndKey(holeStopKey
)
3089 HRegion region
= HBaseFsckRepair
.createHDFSRegionDir(conf
, newRegion
, htd
);
3090 LOG
.info("Plugged hole by creating new empty region: "+ newRegion
+ " " +region
);
3095 * This takes set of overlapping regions and merges them into a single
3096 * region. This covers cases like degenerate regions, shared start key,
3097 * general overlaps, duplicate ranges, and partial overlapping regions.
3100 * - Clean regions that overlap
3101 * - Only .oldlogs regions (can't find start/stop range, or figure out)
3103 * This is basically threadsafe, except for the fixer increment in mergeOverlaps.
3106 public void handleOverlapGroup(Collection
<HbckInfo
> overlap
)
3107 throws IOException
{
3108 Preconditions
.checkNotNull(overlap
);
3109 Preconditions
.checkArgument(overlap
.size() >0);
3111 if (!this.fixOverlaps
) {
3112 LOG
.warn("Not attempting to repair overlaps.");
3116 if (overlap
.size() > maxMerge
) {
3117 LOG
.warn("Overlap group has " + overlap
.size() + " overlapping " +
3118 "regions which is greater than " + maxMerge
+ ", the max number of regions to merge");
3119 if (sidelineBigOverlaps
) {
3120 // we only sideline big overlapped groups that exceeds the max number of regions to merge
3121 sidelineBigOverlaps(overlap
);
3125 if (shouldRemoveParents()) {
3126 removeParentsAndFixSplits(overlap
);
3128 mergeOverlaps(overlap
);
3131 void removeParentsAndFixSplits(Collection
<HbckInfo
> overlap
) throws IOException
{
3132 Pair
<byte[], byte[]> range
= null;
3133 HbckInfo parent
= null;
3134 HbckInfo daughterA
= null;
3135 HbckInfo daughterB
= null;
3136 Collection
<HbckInfo
> daughters
= new ArrayList
<HbckInfo
>(overlap
);
3138 String thread
= Thread
.currentThread().getName();
3139 LOG
.info("== [" + thread
+ "] Attempting fix splits in overlap state.");
3141 // we only can handle a single split per group at the time
3142 if (overlap
.size() > 3) {
3143 LOG
.info("Too many overlaps were found on this group, falling back to regular merge.");
3147 for (HbckInfo hi
: overlap
) {
3148 if (range
== null) {
3149 range
= new Pair
<byte[], byte[]>(hi
.getStartKey(), hi
.getEndKey());
3151 if (RegionSplitCalculator
.BYTES_COMPARATOR
3152 .compare(hi
.getStartKey(), range
.getFirst()) < 0) {
3153 range
.setFirst(hi
.getStartKey());
3155 if (RegionSplitCalculator
.BYTES_COMPARATOR
3156 .compare(hi
.getEndKey(), range
.getSecond()) > 0) {
3157 range
.setSecond(hi
.getEndKey());
3162 LOG
.info("This group range is [" + Bytes
.toStringBinary(range
.getFirst()) + ", "
3163 + Bytes
.toStringBinary(range
.getSecond()) + "]");
3165 // attempt to find a possible parent for the edge case of a split
3166 for (HbckInfo hi
: overlap
) {
3167 if (Bytes
.compareTo(hi
.getHdfsHRI().getStartKey(), range
.getFirst()) == 0
3168 && Bytes
.compareTo(hi
.getHdfsHRI().getEndKey(), range
.getSecond()) == 0) {
3169 LOG
.info("This is a parent for this group: " + hi
.toString());
3174 // Remove parent regions from daughters collection
3175 if (parent
!= null) {
3176 daughters
.remove(parent
);
3179 // Lets verify that daughters share the regionID at split time and they
3180 // were created after the parent
3181 for (HbckInfo hi
: daughters
) {
3182 if (Bytes
.compareTo(hi
.getHdfsHRI().getStartKey(), range
.getFirst()) == 0) {
3183 if (parent
.getHdfsHRI().getRegionId() < hi
.getHdfsHRI().getRegionId()) {
3187 if (Bytes
.compareTo(hi
.getHdfsHRI().getEndKey(), range
.getSecond()) == 0) {
3188 if (parent
.getHdfsHRI().getRegionId() < hi
.getHdfsHRI().getRegionId()) {
3194 // daughters must share the same regionID and we should have a parent too
3195 if (daughterA
.getHdfsHRI().getRegionId() != daughterB
.getHdfsHRI().getRegionId() || parent
== null)
3198 FileSystem fs
= FileSystem
.get(conf
);
3199 LOG
.info("Found parent: " + parent
.getRegionNameAsString());
3200 LOG
.info("Found potential daughter a: " + daughterA
.getRegionNameAsString());
3201 LOG
.info("Found potential daughter b: " + daughterB
.getRegionNameAsString());
3202 LOG
.info("Trying to fix parent in overlap by removing the parent.");
3204 closeRegion(parent
);
3205 } catch (IOException ioe
) {
3206 LOG
.warn("Parent region could not be closed, continuing with regular merge...", ioe
);
3208 } catch (InterruptedException ie
) {
3209 LOG
.warn("Parent region could not be closed, continuing with regular merge...", ie
);
3214 offline(parent
.getRegionName());
3215 } catch (IOException ioe
) {
3216 LOG
.warn("Unable to offline parent region: " + parent
.getRegionNameAsString()
3217 + ". Just continuing with regular merge... ", ioe
);
3222 HBaseFsckRepair
.removeParentInMeta(conf
, parent
.getHdfsHRI());
3223 } catch (IOException ioe
) {
3224 LOG
.warn("Unable to remove parent region in META: " + parent
.getRegionNameAsString()
3225 + ". Just continuing with regular merge... ", ioe
);
3229 sidelineRegionDir(fs
, parent
);
3230 LOG
.info("[" + thread
+ "] Sidelined parent region dir "+ parent
.getHdfsRegionDir() + " into " +
3232 debugLsr(parent
.getHdfsRegionDir());
3234 // Make sure we don't have the parents and daughters around
3235 overlap
.remove(parent
);
3236 overlap
.remove(daughterA
);
3237 overlap
.remove(daughterB
);
3239 LOG
.info("Done fixing split.");
3243 void mergeOverlaps(Collection
<HbckInfo
> overlap
)
3244 throws IOException
{
3245 String thread
= Thread
.currentThread().getName();
3246 LOG
.info("== [" + thread
+ "] Merging regions into one region: "
3247 + Joiner
.on(",").join(overlap
));
3248 // get the min / max range and close all concerned regions
3249 Pair
<byte[], byte[]> range
= null;
3250 for (HbckInfo hi
: overlap
) {
3251 if (range
== null) {
3252 range
= new Pair
<>(hi
.getStartKey(), hi
.getEndKey());
3254 if (RegionSplitCalculator
.BYTES_COMPARATOR
3255 .compare(hi
.getStartKey(), range
.getFirst()) < 0) {
3256 range
.setFirst(hi
.getStartKey());
3258 if (RegionSplitCalculator
.BYTES_COMPARATOR
3259 .compare(hi
.getEndKey(), range
.getSecond()) > 0) {
3260 range
.setSecond(hi
.getEndKey());
3263 // need to close files so delete can happen.
3264 LOG
.debug("[" + thread
+ "] Closing region before moving data around: " + hi
);
3265 LOG
.debug("[" + thread
+ "] Contained region dir before close");
3266 debugLsr(hi
.getHdfsRegionDir());
3268 LOG
.info("[" + thread
+ "] Closing region: " + hi
);
3270 } catch (IOException ioe
) {
3271 LOG
.warn("[" + thread
+ "] Was unable to close region " + hi
3272 + ". Just continuing... ", ioe
);
3273 } catch (InterruptedException e
) {
3274 LOG
.warn("[" + thread
+ "] Was unable to close region " + hi
3275 + ". Just continuing... ", e
);
3279 LOG
.info("[" + thread
+ "] Offlining region: " + hi
);
3280 offline(hi
.getRegionName());
3281 } catch (IOException ioe
) {
3282 LOG
.warn("[" + thread
+ "] Unable to offline region from master: " + hi
3283 + ". Just continuing... ", ioe
);
3287 // create new empty container region.
3288 TableDescriptor htd
= getTableInfo().getHTD();
3289 // from start key to end Key
3290 RegionInfo newRegion
= RegionInfoBuilder
.newBuilder(htd
.getTableName())
3291 .setStartKey(range
.getFirst())
3292 .setEndKey(range
.getSecond())
3294 HRegion region
= HBaseFsckRepair
.createHDFSRegionDir(conf
, newRegion
, htd
);
3295 LOG
.info("[" + thread
+ "] Created new empty container region: " +
3296 newRegion
+ " to contain regions: " + Joiner
.on(",").join(overlap
));
3297 debugLsr(region
.getRegionFileSystem().getRegionDir());
3299 // all target regions are closed, should be able to safely cleanup.
3300 boolean didFix
= false;
3301 Path target
= region
.getRegionFileSystem().getRegionDir();
3302 for (HbckInfo contained
: overlap
) {
3303 LOG
.info("[" + thread
+ "] Merging " + contained
+ " into " + target
);
3304 int merges
= mergeRegionDirs(target
, contained
);
3315 * Sideline some regions in a big overlap group so that it
3316 * will have fewer regions, and it is easier to merge them later on.
3318 * @param bigOverlap the overlapped group with regions more than maxMerge
3319 * @throws IOException
3321 void sidelineBigOverlaps(
3322 Collection
<HbckInfo
> bigOverlap
) throws IOException
{
3323 int overlapsToSideline
= bigOverlap
.size() - maxMerge
;
3324 if (overlapsToSideline
> maxOverlapsToSideline
) {
3325 overlapsToSideline
= maxOverlapsToSideline
;
3327 List
<HbckInfo
> regionsToSideline
=
3328 RegionSplitCalculator
.findBigRanges(bigOverlap
, overlapsToSideline
);
3329 FileSystem fs
= FileSystem
.get(conf
);
3330 for (HbckInfo regionToSideline
: regionsToSideline
) {
3332 LOG
.info("Closing region: " + regionToSideline
);
3333 closeRegion(regionToSideline
);
3334 } catch (IOException ioe
) {
3335 LOG
.warn("Was unable to close region " + regionToSideline
3336 + ". Just continuing... ", ioe
);
3337 } catch (InterruptedException e
) {
3338 LOG
.warn("Was unable to close region " + regionToSideline
3339 + ". Just continuing... ", e
);
3343 LOG
.info("Offlining region: " + regionToSideline
);
3344 offline(regionToSideline
.getRegionName());
3345 } catch (IOException ioe
) {
3346 LOG
.warn("Unable to offline region from master: " + regionToSideline
3347 + ". Just continuing... ", ioe
);
3350 LOG
.info("Before sideline big overlapped region: " + regionToSideline
.toString());
3351 Path sidelineRegionDir
= sidelineRegionDir(fs
, TO_BE_LOADED
, regionToSideline
);
3352 if (sidelineRegionDir
!= null) {
3353 sidelinedRegions
.put(sidelineRegionDir
, regionToSideline
);
3354 LOG
.info("After sidelined big overlapped region: "
3355 + regionToSideline
.getRegionNameAsString()
3356 + " to " + sidelineRegionDir
.toString());
3364 * Check the region chain (from META) of this table. We are looking for
3365 * holes, overlaps, and cycles.
3366 * @return false if there are errors
3367 * @throws IOException
3369 public boolean checkRegionChain(TableIntegrityErrorHandler handler
) throws IOException
{
3370 // When table is disabled no need to check for the region chain. Some of the regions
3371 // accidently if deployed, this below code might report some issues like missing start
3372 // or end regions or region hole in chain and may try to fix which is unwanted.
3373 if (isTableDisabled(this.tableName
)) {
3376 int originalErrorsCount
= errors
.getErrorList().size();
3377 Multimap
<byte[], HbckInfo
> regions
= sc
.calcCoverage();
3378 SortedSet
<byte[]> splits
= sc
.getSplits();
3380 byte[] prevKey
= null;
3381 byte[] problemKey
= null;
3383 if (splits
.isEmpty()) {
3384 // no region for this table
3385 handler
.handleHoleInRegionChain(HConstants
.EMPTY_START_ROW
, HConstants
.EMPTY_END_ROW
);
3388 for (byte[] key
: splits
) {
3389 Collection
<HbckInfo
> ranges
= regions
.get(key
);
3390 if (prevKey
== null && !Bytes
.equals(key
, HConstants
.EMPTY_BYTE_ARRAY
)) {
3391 for (HbckInfo rng
: ranges
) {
3392 handler
.handleRegionStartKeyNotEmpty(rng
);
3396 // check for degenerate ranges
3397 for (HbckInfo rng
: ranges
) {
3398 // special endkey case converts '' to null
3399 byte[] endKey
= rng
.getEndKey();
3400 endKey
= (endKey
.length
== 0) ?
null : endKey
;
3401 if (Bytes
.equals(rng
.getStartKey(),endKey
)) {
3402 handler
.handleDegenerateRegion(rng
);
3406 if (ranges
.size() == 1) {
3407 // this split key is ok -- no overlap, not a hole.
3408 if (problemKey
!= null) {
3409 LOG
.warn("reached end of problem group: " + Bytes
.toStringBinary(key
));
3411 problemKey
= null; // fell through, no more problem.
3412 } else if (ranges
.size() > 1) {
3413 // set the new problem key group name, if already have problem key, just
3415 if (problemKey
== null) {
3416 // only for overlap regions.
3417 LOG
.warn("Naming new problem group: " + Bytes
.toStringBinary(key
));
3420 overlapGroups
.putAll(problemKey
, ranges
);
3423 ArrayList
<HbckInfo
> subRange
= new ArrayList
<>(ranges
);
3424 // this dumb and n^2 but this shouldn't happen often
3425 for (HbckInfo r1
: ranges
) {
3426 if (r1
.getReplicaId() != RegionInfo
.DEFAULT_REPLICA_ID
) continue;
3427 subRange
.remove(r1
);
3428 for (HbckInfo r2
: subRange
) {
3429 if (r2
.getReplicaId() != RegionInfo
.DEFAULT_REPLICA_ID
) continue;
3430 // general case of same start key
3431 if (Bytes
.compareTo(r1
.getStartKey(), r2
.getStartKey())==0) {
3432 handler
.handleDuplicateStartKeys(r1
,r2
);
3433 } else if (Bytes
.compareTo(r1
.getEndKey(), r2
.getStartKey())==0 &&
3434 r1
.getHdfsHRI().getRegionId() == r2
.getHdfsHRI().getRegionId()) {
3435 LOG
.info("this is a split, log to splits");
3436 handler
.handleSplit(r1
, r2
);
3439 handler
.handleOverlapInRegionChain(r1
, r2
);
3444 } else if (ranges
.isEmpty()) {
3445 if (problemKey
!= null) {
3446 LOG
.warn("reached end of problem group: " + Bytes
.toStringBinary(key
));
3450 byte[] holeStopKey
= sc
.getSplits().higher(key
);
3451 // if higher key is null we reached the top.
3452 if (holeStopKey
!= null) {
3454 handler
.handleHoleInRegionChain(key
, holeStopKey
);
3460 // When the last region of a table is proper and having an empty end key, 'prevKey'
3462 if (prevKey
!= null) {
3463 handler
.handleRegionEndKeyNotEmpty(prevKey
);
3466 // TODO fold this into the TableIntegrityHandler
3467 if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
3468 boolean ok
= handleOverlapsParallel(handler
, prevKey
);
3473 for (Collection
<HbckInfo
> overlap
: overlapGroups
.asMap().values()) {
3474 handler
.handleOverlapGroup(overlap
);
3479 // do full region split map dump
3480 errors
.print("---- Table '" + this.tableName
3481 + "': region split map");
3482 dump(splits
, regions
);
3483 errors
.print("---- Table '" + this.tableName
3484 + "': overlap groups");
3485 dumpOverlapProblems(overlapGroups
);
3486 errors
.print("There are " + overlapGroups
.keySet().size()
3487 + " overlap groups with " + overlapGroups
.size()
3488 + " overlapping regions");
3490 if (!sidelinedRegions
.isEmpty()) {
3491 LOG
.warn("Sidelined big overlapped regions, please bulk load them!");
3492 errors
.print("---- Table '" + this.tableName
3493 + "': sidelined big overlapped regions");
3494 dumpSidelinedRegions(sidelinedRegions
);
3496 return errors
.getErrorList().size() == originalErrorsCount
;
3499 private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler
, byte[] prevKey
)
3500 throws IOException
{
3501 // we parallelize overlap handler for the case we have lots of groups to fix. We can
3502 // safely assume each group is independent.
3503 List
<WorkItemOverlapMerge
> merges
= new ArrayList
<>(overlapGroups
.size());
3504 List
<Future
<Void
>> rets
;
3505 for (Collection
<HbckInfo
> overlap
: overlapGroups
.asMap().values()) {
3507 merges
.add(new WorkItemOverlapMerge(overlap
, handler
));
3510 rets
= executor
.invokeAll(merges
);
3511 } catch (InterruptedException e
) {
3512 LOG
.error("Overlap merges were interrupted", e
);
3515 for(int i
=0; i
<merges
.size(); i
++) {
3516 WorkItemOverlapMerge work
= merges
.get(i
);
3517 Future
<Void
> f
= rets
.get(i
);
3520 } catch(ExecutionException e
) {
3521 LOG
.warn("Failed to merge overlap group" + work
, e
.getCause());
3522 } catch (InterruptedException e
) {
3523 LOG
.error("Waiting for overlap merges was interrupted", e
);
3531 * This dumps data in a visually reasonable way for visual debugging
3536 void dump(SortedSet
<byte[]> splits
, Multimap
<byte[], HbckInfo
> regions
) {
3537 // we display this way because the last end key should be displayed as well.
3538 StringBuilder sb
= new StringBuilder();
3539 for (byte[] k
: splits
) {
3540 sb
.setLength(0); // clear out existing buffer, if any.
3541 sb
.append(Bytes
.toStringBinary(k
) + ":\t");
3542 for (HbckInfo r
: regions
.get(k
)) {
3543 sb
.append("[ "+ r
.toString() + ", "
3544 + Bytes
.toStringBinary(r
.getEndKey())+ "]\t");
3546 errors
.print(sb
.toString());
3551 public void dumpOverlapProblems(Multimap
<byte[], HbckInfo
> regions
) {
3552 // we display this way because the last end key should be displayed as
3554 for (byte[] k
: regions
.keySet()) {
3555 errors
.print(Bytes
.toStringBinary(k
) + ":");
3556 for (HbckInfo r
: regions
.get(k
)) {
3557 errors
.print("[ " + r
.toString() + ", "
3558 + Bytes
.toStringBinary(r
.getEndKey()) + "]");
3560 errors
.print("----");
3564 public void dumpSidelinedRegions(Map
<Path
, HbckInfo
> regions
) {
3565 for (Map
.Entry
<Path
, HbckInfo
> entry
: regions
.entrySet()) {
3566 TableName tableName
= entry
.getValue().getTableName();
3567 Path path
= entry
.getKey();
3568 errors
.print("This sidelined region dir should be bulk loaded: "
3570 errors
.print("Bulk load command looks like: "
3571 + "hbase org.apache.hadoop.hbase.tool.LoadIncrementalHFiles "
3572 + path
.toUri().getPath() + " "+ tableName
);
3576 public Multimap
<byte[], HbckInfo
> getOverlapGroups(
3578 TableInfo ti
= tablesInfo
.get(table
);
3579 return ti
.overlapGroups
;
3583 * Return a list of user-space table names whose metadata have not been
3584 * modified in the last few milliseconds specified by timelag
3585 * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
3586 * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
3587 * milliseconds specified by timelag, then the table is a candidate to be returned.
3588 * @return tables that have not been modified recently
3589 * @throws IOException if an error is encountered
3591 TableDescriptor
[] getTables(AtomicInteger numSkipped
) {
3592 List
<TableName
> tableNames
= new ArrayList
<>();
3593 long now
= EnvironmentEdgeManager
.currentTime();
3595 for (HbckInfo hbi
: regionInfoMap
.values()) {
3596 MetaEntry info
= hbi
.metaEntry
;
3598 // if the start key is zero, then we have found the first region of a table.
3599 // pick only those tables that were not modified in the last few milliseconds.
3600 if (info
!= null && info
.getStartKey().length
== 0 && !info
.isMetaRegion()) {
3601 if (info
.modTime
+ timelag
< now
) {
3602 tableNames
.add(info
.getTable());
3604 numSkipped
.incrementAndGet(); // one more in-flux table
3608 return getTableDescriptors(tableNames
);
3611 TableDescriptor
[] getTableDescriptors(List
<TableName
> tableNames
) {
3612 LOG
.info("getTableDescriptors == tableNames => " + tableNames
);
3613 try (Connection conn
= ConnectionFactory
.createConnection(getConf());
3614 Admin admin
= conn
.getAdmin()) {
3615 List
<TableDescriptor
> tds
= admin
.listTableDescriptors(tableNames
);
3616 return tds
.toArray(new TableDescriptor
[tds
.size()]);
3617 } catch (IOException e
) {
3618 LOG
.debug("Exception getting table descriptors", e
);
3620 return new TableDescriptor
[0];
3624 * Gets the entry in regionInfo corresponding to the the given encoded
3625 * region name. If the region has not been seen yet, a new entry is added
3628 private synchronized HbckInfo
getOrCreateInfo(String name
) {
3629 HbckInfo hbi
= regionInfoMap
.get(name
);
3631 hbi
= new HbckInfo(null);
3632 regionInfoMap
.put(name
, hbi
);
3637 private void checkAndFixReplication() throws ReplicationException
{
3638 ReplicationChecker checker
= new ReplicationChecker(getConf(), zkw
, errors
);
3639 checker
.checkUnDeletedQueues();
3641 if (checker
.hasUnDeletedQueues() && this.fixReplication
) {
3642 checker
.fixUnDeletedQueues();
3648 * Check values in regionInfo for hbase:meta
3649 * Check if zero or more than one regions with hbase:meta are found.
3650 * If there are inconsistencies (i.e. zero or more than one regions
3651 * pretend to be holding the hbase:meta) try to fix that and report an error.
3652 * @throws IOException from HBaseFsckRepair functions
3653 * @throws KeeperException
3654 * @throws InterruptedException
3656 boolean checkMetaRegion() throws IOException
, KeeperException
, InterruptedException
{
3657 Map
<Integer
, HbckInfo
> metaRegions
= new HashMap
<>();
3658 for (HbckInfo value
: regionInfoMap
.values()) {
3659 if (value
.metaEntry
!= null && value
.metaEntry
.isMetaRegion()) {
3660 metaRegions
.put(value
.getReplicaId(), value
);
3663 int metaReplication
= admin
.getTableDescriptor(TableName
.META_TABLE_NAME
)
3664 .getRegionReplication();
3665 boolean noProblem
= true;
3666 // There will be always entries in regionInfoMap corresponding to hbase:meta & its replicas
3667 // Check the deployed servers. It should be exactly one server for each replica.
3668 for (int i
= 0; i
< metaReplication
; i
++) {
3669 HbckInfo metaHbckInfo
= metaRegions
.remove(i
);
3670 List
<ServerName
> servers
= new ArrayList
<>();
3671 if (metaHbckInfo
!= null) {
3672 servers
= metaHbckInfo
.deployedOn
;
3674 if (servers
.size() != 1) {
3676 if (servers
.isEmpty()) {
3677 assignMetaReplica(i
);
3678 } else if (servers
.size() > 1) {
3680 .reportError(ERROR_CODE
.MULTI_META_REGION
, "hbase:meta, replicaId " +
3681 metaHbckInfo
.getReplicaId() + " is found on more than one region.");
3682 if (shouldFixAssignments()) {
3683 errors
.print("Trying to fix a problem with hbase:meta, replicaId " +
3684 metaHbckInfo
.getReplicaId() +"..");
3686 // try fix it (treat is a dupe assignment)
3687 HBaseFsckRepair
.fixMultiAssignment(connection
, metaHbckInfo
.metaEntry
, servers
);
3692 // unassign whatever is remaining in metaRegions. They are excess replicas.
3693 for (Map
.Entry
<Integer
, HbckInfo
> entry
: metaRegions
.entrySet()) {
3695 errors
.reportError(ERROR_CODE
.SHOULD_NOT_BE_DEPLOYED
,
3696 "hbase:meta replicas are deployed in excess. Configured " + metaReplication
+
3697 ", deployed " + metaRegions
.size());
3698 if (shouldFixAssignments()) {
3699 errors
.print("Trying to undeploy excess replica, replicaId: " + entry
.getKey() +
3700 " of hbase:meta..");
3702 unassignMetaReplica(entry
.getValue());
3705 // if noProblem is false, rerun hbck with hopefully fixed META
3706 // if noProblem is true, no errors, so continue normally
3710 private void unassignMetaReplica(HbckInfo hi
) throws IOException
, InterruptedException
,
3712 undeployRegions(hi
);
3713 ZKUtil
.deleteNode(zkw
, zkw
.getZNodePaths().getZNodeForReplica(hi
.metaEntry
.getReplicaId()));
3716 private void assignMetaReplica(int replicaId
)
3717 throws IOException
, KeeperException
, InterruptedException
{
3718 errors
.reportError(ERROR_CODE
.NO_META_REGION
, "hbase:meta, replicaId " +
3719 replicaId
+" is not found on any region.");
3720 if (shouldFixAssignments()) {
3721 errors
.print("Trying to fix a problem with hbase:meta..");
3723 // try to fix it (treat it as unassigned region)
3724 RegionInfo h
= RegionReplicaUtil
.getRegionInfoForReplica(
3725 RegionInfoBuilder
.FIRST_META_REGIONINFO
, replicaId
);
3726 HBaseFsckRepair
.fixUnassigned(admin
, h
);
3727 HBaseFsckRepair
.waitUntilAssigned(admin
, h
);
3732 * Scan hbase:meta, adding all regions found to the regionInfo map.
3733 * @throws IOException if an error is encountered
3735 boolean loadMetaEntries() throws IOException
{
3736 MetaTableAccessor
.Visitor visitor
= new MetaTableAccessor
.Visitor() {
3737 int countRecord
= 1;
3739 // comparator to sort KeyValues with latest modtime
3740 final Comparator
<Cell
> comp
= new Comparator
<Cell
>() {
3742 public int compare(Cell k1
, Cell k2
) {
3743 return Long
.compare(k1
.getTimestamp(), k2
.getTimestamp());
3748 public boolean visit(Result result
) throws IOException
{
3751 // record the latest modification of this META record
3752 long ts
= Collections
.max(result
.listCells(), comp
).getTimestamp();
3753 RegionLocations rl
= MetaTableAccessor
.getRegionLocations(result
);
3755 emptyRegionInfoQualifiers
.add(result
);
3756 errors
.reportError(ERROR_CODE
.EMPTY_META_CELL
,
3757 "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3760 ServerName sn
= null;
3761 if (rl
.getRegionLocation(RegionInfo
.DEFAULT_REPLICA_ID
) == null ||
3762 rl
.getRegionLocation(RegionInfo
.DEFAULT_REPLICA_ID
).getRegionInfo() == null) {
3763 emptyRegionInfoQualifiers
.add(result
);
3764 errors
.reportError(ERROR_CODE
.EMPTY_META_CELL
,
3765 "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3768 RegionInfo hri
= rl
.getRegionLocation(RegionInfo
.DEFAULT_REPLICA_ID
).getRegionInfo();
3769 if (!(isTableIncluded(hri
.getTable())
3770 || hri
.isMetaRegion())) {
3773 PairOfSameType
<RegionInfo
> daughters
= MetaTableAccessor
.getDaughterRegions(result
);
3774 for (HRegionLocation h
: rl
.getRegionLocations()) {
3775 if (h
== null || h
.getRegionInfo() == null) {
3778 sn
= h
.getServerName();
3779 hri
= h
.getRegionInfo();
3782 if (hri
.getReplicaId() == RegionInfo
.DEFAULT_REPLICA_ID
) {
3783 m
= new MetaEntry(hri
, sn
, ts
, daughters
.getFirst(), daughters
.getSecond());
3785 m
= new MetaEntry(hri
, sn
, ts
, null, null);
3787 HbckInfo previous
= regionInfoMap
.get(hri
.getEncodedName());
3788 if (previous
== null) {
3789 regionInfoMap
.put(hri
.getEncodedName(), new HbckInfo(m
));
3790 } else if (previous
.metaEntry
== null) {
3791 previous
.metaEntry
= m
;
3793 throw new IOException("Two entries in hbase:meta are same " + previous
);
3796 PairOfSameType
<RegionInfo
> mergeRegions
= MetaTableAccessor
.getMergeRegions(result
);
3797 for (RegionInfo mergeRegion
: new RegionInfo
[] {
3798 mergeRegions
.getFirst(), mergeRegions
.getSecond() }) {
3799 if (mergeRegion
!= null) {
3800 // This region is already been merged
3801 HbckInfo hbInfo
= getOrCreateInfo(mergeRegion
.getEncodedName());
3802 hbInfo
.setMerged(true);
3806 // show proof of progress to the user, once for every 100 records.
3807 if (countRecord
% 100 == 0) {
3812 } catch (RuntimeException e
) {
3813 LOG
.error("Result=" + result
);
3818 if (!checkMetaOnly
) {
3819 // Scan hbase:meta to pick up user regions
3820 MetaTableAccessor
.fullScanRegions(connection
, visitor
);
3828 * Stores the regioninfo entries scanned from META
3830 static class MetaEntry
extends HRegionInfo
{
3831 ServerName regionServer
; // server hosting this region
3832 long modTime
; // timestamp of most recent modification metadata
3833 RegionInfo splitA
, splitB
; //split daughters
3835 public MetaEntry(RegionInfo rinfo
, ServerName regionServer
, long modTime
) {
3836 this(rinfo
, regionServer
, modTime
, null, null);
3839 public MetaEntry(RegionInfo rinfo
, ServerName regionServer
, long modTime
,
3840 RegionInfo splitA
, RegionInfo splitB
) {
3842 this.regionServer
= regionServer
;
3843 this.modTime
= modTime
;
3844 this.splitA
= splitA
;
3845 this.splitB
= splitB
;
3849 public boolean equals(Object o
) {
3850 boolean superEq
= super.equals(o
);
3855 MetaEntry me
= (MetaEntry
) o
;
3856 if (!regionServer
.equals(me
.regionServer
)) {
3859 return (modTime
== me
.modTime
);
3863 public int hashCode() {
3864 int hash
= Arrays
.hashCode(getRegionName());
3865 hash
= (int) (hash ^
getRegionId());
3866 hash ^
= Arrays
.hashCode(getStartKey());
3867 hash ^
= Arrays
.hashCode(getEndKey());
3868 hash ^
= Boolean
.valueOf(isOffline()).hashCode();
3869 hash ^
= getTable().hashCode();
3870 if (regionServer
!= null) {
3871 hash ^
= regionServer
.hashCode();
3873 hash
= (int) (hash ^ modTime
);
3879 * Stores the regioninfo entries from HDFS
3881 static class HdfsEntry
{
3883 Path hdfsRegionDir
= null;
3884 long hdfsRegionDirModTime
= 0;
3885 boolean hdfsRegioninfoFilePresent
= false;
3886 boolean hdfsOnlyEdits
= false;
3890 * Stores the regioninfo retrieved from Online region servers.
3892 static class OnlineEntry
{
3897 public String
toString() {
3898 return hsa
.toString() + ";" + hri
.getRegionNameAsString();
3903 * Maintain information about a particular region. It gathers information
3904 * from three places -- HDFS, META, and region servers.
3906 public static class HbckInfo
implements KeyRange
{
3907 private MetaEntry metaEntry
= null; // info in META
3908 private HdfsEntry hdfsEntry
= null; // info in HDFS
3909 private List
<OnlineEntry
> deployedEntries
= Lists
.newArrayList(); // on Region Server
3910 private List
<ServerName
> deployedOn
= Lists
.newArrayList(); // info on RS's
3911 private boolean skipChecks
= false; // whether to skip further checks to this region info.
3912 private boolean isMerged
= false;// whether this region has already been merged into another one
3913 private int deployedReplicaId
= RegionInfo
.DEFAULT_REPLICA_ID
;
3914 private RegionInfo primaryHRIForDeployedReplica
= null;
3916 HbckInfo(MetaEntry metaEntry
) {
3917 this.metaEntry
= metaEntry
;
3920 public synchronized int getReplicaId() {
3921 return metaEntry
!= null? metaEntry
.getReplicaId(): deployedReplicaId
;
3924 public synchronized void addServer(RegionInfo hri
, ServerName server
) {
3925 OnlineEntry rse
= new OnlineEntry() ;
3928 this.deployedEntries
.add(rse
);
3929 this.deployedOn
.add(server
);
3930 // save the replicaId that we see deployed in the cluster
3931 this.deployedReplicaId
= hri
.getReplicaId();
3932 this.primaryHRIForDeployedReplica
=
3933 RegionReplicaUtil
.getRegionInfoForDefaultReplica(hri
);
3937 public synchronized String
toString() {
3938 StringBuilder sb
= new StringBuilder();
3939 sb
.append("{ meta => ");
3940 sb
.append((metaEntry
!= null)? metaEntry
.getRegionNameAsString() : "null");
3941 sb
.append( ", hdfs => " + getHdfsRegionDir());
3942 sb
.append( ", deployed => " + Joiner
.on(", ").join(deployedEntries
));
3943 sb
.append( ", replicaId => " + getReplicaId());
3945 return sb
.toString();
3949 public byte[] getStartKey() {
3950 if (this.metaEntry
!= null) {
3951 return this.metaEntry
.getStartKey();
3952 } else if (this.hdfsEntry
!= null) {
3953 return this.hdfsEntry
.hri
.getStartKey();
3955 LOG
.error("Entry " + this + " has no meta or hdfs region start key.");
3961 public byte[] getEndKey() {
3962 if (this.metaEntry
!= null) {
3963 return this.metaEntry
.getEndKey();
3964 } else if (this.hdfsEntry
!= null) {
3965 return this.hdfsEntry
.hri
.getEndKey();
3967 LOG
.error("Entry " + this + " has no meta or hdfs region start key.");
3972 public TableName
getTableName() {
3973 if (this.metaEntry
!= null) {
3974 return this.metaEntry
.getTable();
3975 } else if (this.hdfsEntry
!= null) {
3976 // we are only guaranteed to have a path and not an HRI for hdfsEntry,
3977 // so we get the name from the Path
3978 Path tableDir
= this.hdfsEntry
.hdfsRegionDir
.getParent();
3979 return FSUtils
.getTableName(tableDir
);
3981 // return the info from the first online/deployed hri
3982 for (OnlineEntry e
: deployedEntries
) {
3983 return e
.hri
.getTable();
3989 public String
getRegionNameAsString() {
3990 if (metaEntry
!= null) {
3991 return metaEntry
.getRegionNameAsString();
3992 } else if (hdfsEntry
!= null) {
3993 if (hdfsEntry
.hri
!= null) {
3994 return hdfsEntry
.hri
.getRegionNameAsString();
3997 // return the info from the first online/deployed hri
3998 for (OnlineEntry e
: deployedEntries
) {
3999 return e
.hri
.getRegionNameAsString();
4005 public byte[] getRegionName() {
4006 if (metaEntry
!= null) {
4007 return metaEntry
.getRegionName();
4008 } else if (hdfsEntry
!= null) {
4009 return hdfsEntry
.hri
.getRegionName();
4011 // return the info from the first online/deployed hri
4012 for (OnlineEntry e
: deployedEntries
) {
4013 return e
.hri
.getRegionName();
4019 public RegionInfo
getPrimaryHRIForDeployedReplica() {
4020 return primaryHRIForDeployedReplica
;
4023 Path
getHdfsRegionDir() {
4024 if (hdfsEntry
== null) {
4027 return hdfsEntry
.hdfsRegionDir
;
4030 boolean containsOnlyHdfsEdits() {
4031 if (hdfsEntry
== null) {
4034 return hdfsEntry
.hdfsOnlyEdits
;
4037 boolean isHdfsRegioninfoPresent() {
4038 if (hdfsEntry
== null) {
4041 return hdfsEntry
.hdfsRegioninfoFilePresent
;
4045 if (hdfsEntry
== null) {
4048 return hdfsEntry
.hdfsRegionDirModTime
;
4051 RegionInfo
getHdfsHRI() {
4052 if (hdfsEntry
== null) {
4055 return hdfsEntry
.hri
;
4058 public void setSkipChecks(boolean skipChecks
) {
4059 this.skipChecks
= skipChecks
;
4062 public boolean isSkipChecks() {
4066 public void setMerged(boolean isMerged
) {
4067 this.isMerged
= isMerged
;
4070 public boolean isMerged() {
4071 return this.isMerged
;
4075 final static Comparator
<HbckInfo
> cmp
= new Comparator
<HbckInfo
>() {
4077 public int compare(HbckInfo l
, HbckInfo r
) {
4083 int tableCompare
= l
.getTableName().compareTo(r
.getTableName());
4084 if (tableCompare
!= 0) {
4085 return tableCompare
;
4088 int startComparison
= RegionSplitCalculator
.BYTES_COMPARATOR
.compare(
4089 l
.getStartKey(), r
.getStartKey());
4090 if (startComparison
!= 0) {
4091 return startComparison
;
4094 // Special case for absolute endkey
4095 byte[] endKey
= r
.getEndKey();
4096 endKey
= (endKey
.length
== 0) ?
null : endKey
;
4097 byte[] endKey2
= l
.getEndKey();
4098 endKey2
= (endKey2
.length
== 0) ?
null : endKey2
;
4099 int endComparison
= RegionSplitCalculator
.BYTES_COMPARATOR
.compare(
4102 if (endComparison
!= 0) {
4103 return endComparison
;
4106 // use regionId as tiebreaker.
4107 // Null is considered after all possible values so make it bigger.
4108 if (l
.hdfsEntry
== null && r
.hdfsEntry
== null) {
4111 if (l
.hdfsEntry
== null && r
.hdfsEntry
!= null) {
4114 // l.hdfsEntry must not be null
4115 if (r
.hdfsEntry
== null) {
4118 // both l.hdfsEntry and r.hdfsEntry must not be null.
4119 return Long
.compare(l
.hdfsEntry
.hri
.getRegionId(), r
.hdfsEntry
.hri
.getRegionId());
4124 * Prints summary of all tables found on the system.
4126 private void printTableSummary(SortedMap
<TableName
, TableInfo
> tablesInfo
) {
4127 StringBuilder sb
= new StringBuilder();
4128 int numOfSkippedRegions
;
4129 errors
.print("Summary:");
4130 for (TableInfo tInfo
: tablesInfo
.values()) {
4131 numOfSkippedRegions
= (skippedRegions
.containsKey(tInfo
.getName())) ?
4132 skippedRegions
.get(tInfo
.getName()).size() : 0;
4134 if (errors
.tableHasErrors(tInfo
)) {
4135 errors
.print("Table " + tInfo
.getName() + " is inconsistent.");
4136 } else if (numOfSkippedRegions
> 0){
4137 errors
.print("Table " + tInfo
.getName() + " is okay (with "
4138 + numOfSkippedRegions
+ " skipped regions).");
4141 errors
.print("Table " + tInfo
.getName() + " is okay.");
4143 errors
.print(" Number of regions: " + tInfo
.getNumRegions());
4144 if (numOfSkippedRegions
> 0) {
4145 Set
<String
> skippedRegionStrings
= skippedRegions
.get(tInfo
.getName());
4146 System
.out
.println(" Number of skipped regions: " + numOfSkippedRegions
);
4147 System
.out
.println(" List of skipped regions:");
4148 for(String sr
: skippedRegionStrings
) {
4149 System
.out
.println(" " + sr
);
4152 sb
.setLength(0); // clear out existing buffer, if any.
4153 sb
.append(" Deployed on: ");
4154 for (ServerName server
: tInfo
.deployedOn
) {
4155 sb
.append(" " + server
.toString());
4157 errors
.print(sb
.toString());
4161 static ErrorReporter
getErrorReporter(
4162 final Configuration conf
) throws ClassNotFoundException
{
4163 Class
<?
extends ErrorReporter
> reporter
= conf
.getClass("hbasefsck.errorreporter", PrintingErrorReporter
.class, ErrorReporter
.class);
4164 return ReflectionUtils
.newInstance(reporter
, conf
);
4167 public interface ErrorReporter
{
4169 UNKNOWN
, NO_META_REGION
, NULL_META_REGION
, NO_VERSION_FILE
, NOT_IN_META_HDFS
, NOT_IN_META
,
4170 NOT_IN_META_OR_DEPLOYED
, NOT_IN_HDFS_OR_DEPLOYED
, NOT_IN_HDFS
, SERVER_DOES_NOT_MATCH_META
,
4171 NOT_DEPLOYED
, MULTI_DEPLOYED
, SHOULD_NOT_BE_DEPLOYED
, MULTI_META_REGION
, RS_CONNECT_FAILURE
,
4172 FIRST_REGION_STARTKEY_NOT_EMPTY
, LAST_REGION_ENDKEY_NOT_EMPTY
, DUPE_STARTKEYS
,
4173 HOLE_IN_REGION_CHAIN
, OVERLAP_IN_REGION_CHAIN
, REGION_CYCLE
, DEGENERATE_REGION
,
4174 ORPHAN_HDFS_REGION
, LINGERING_SPLIT_PARENT
, NO_TABLEINFO_FILE
, LINGERING_REFERENCE_HFILE
,
4175 LINGERING_HFILELINK
, WRONG_USAGE
, EMPTY_META_CELL
, EXPIRED_TABLE_LOCK
, BOUNDARIES_ERROR
,
4176 ORPHAN_TABLE_STATE
, NO_TABLE_STATE
, UNDELETED_REPLICATION_QUEUE
, DUPE_ENDKEYS
,
4177 UNSUPPORTED_OPTION
, INVALID_TABLE
4180 void report(String message
);
4181 void reportError(String message
);
4182 void reportError(ERROR_CODE errorCode
, String message
);
4183 void reportError(ERROR_CODE errorCode
, String message
, TableInfo table
);
4184 void reportError(ERROR_CODE errorCode
, String message
, TableInfo table
, HbckInfo info
);
4186 ERROR_CODE errorCode
,
4193 void detail(String details
);
4194 ArrayList
<ERROR_CODE
> getErrorList();
4196 void print(String message
);
4198 boolean tableHasErrors(TableInfo table
);
4201 static class PrintingErrorReporter
implements ErrorReporter
{
4202 public int errorCount
= 0;
4203 private int showProgress
;
4204 // How frequently calls to progress() will create output
4205 private static final int progressThreshold
= 100;
4207 Set
<TableInfo
> errorTables
= new HashSet
<>();
4209 // for use by unit tests to verify which errors were discovered
4210 private ArrayList
<ERROR_CODE
> errorList
= new ArrayList
<>();
4213 public void clear() {
4214 errorTables
.clear();
4220 public synchronized void reportError(ERROR_CODE errorCode
, String message
) {
4221 if (errorCode
== ERROR_CODE
.WRONG_USAGE
) {
4222 System
.err
.println(message
);
4226 errorList
.add(errorCode
);
4228 System
.out
.println("ERROR: " + message
);
4235 public synchronized void reportError(ERROR_CODE errorCode
, String message
, TableInfo table
) {
4236 errorTables
.add(table
);
4237 reportError(errorCode
, message
);
4241 public synchronized void reportError(ERROR_CODE errorCode
, String message
, TableInfo table
,
4243 errorTables
.add(table
);
4244 String reference
= "(region " + info
.getRegionNameAsString() + ")";
4245 reportError(errorCode
, reference
+ " " + message
);
4249 public synchronized void reportError(ERROR_CODE errorCode
, String message
, TableInfo table
,
4250 HbckInfo info1
, HbckInfo info2
) {
4251 errorTables
.add(table
);
4252 String reference
= "(regions " + info1
.getRegionNameAsString()
4253 + " and " + info2
.getRegionNameAsString() + ")";
4254 reportError(errorCode
, reference
+ " " + message
);
4258 public synchronized void reportError(String message
) {
4259 reportError(ERROR_CODE
.UNKNOWN
, message
);
4263 * Report error information, but do not increment the error count. Intended for cases
4264 * where the actual error would have been reported previously.
4268 public synchronized void report(String message
) {
4270 System
.out
.println("ERROR: " + message
);
4276 public synchronized int summarize() {
4277 System
.out
.println(Integer
.toString(errorCount
) +
4278 " inconsistencies detected.");
4279 if (errorCount
== 0) {
4280 System
.out
.println("Status: OK");
4283 System
.out
.println("Status: INCONSISTENT");
4289 public ArrayList
<ERROR_CODE
> getErrorList() {
4294 public synchronized void print(String message
) {
4296 System
.out
.println(message
);
4301 public boolean tableHasErrors(TableInfo table
) {
4302 return errorTables
.contains(table
);
4306 public void resetErrors() {
4311 public synchronized void detail(String message
) {
4313 System
.out
.println(message
);
4319 public synchronized void progress() {
4320 if (showProgress
++ == progressThreshold
) {
4322 System
.out
.print(".");
4330 * Contact a region server and get all information from it
4332 static class WorkItemRegion
implements Callable
<Void
> {
4333 private final HBaseFsck hbck
;
4334 private final ServerName rsinfo
;
4335 private final ErrorReporter errors
;
4336 private final ClusterConnection connection
;
4338 WorkItemRegion(HBaseFsck hbck
, ServerName info
,
4339 ErrorReporter errors
, ClusterConnection connection
) {
4342 this.errors
= errors
;
4343 this.connection
= connection
;
4347 public synchronized Void
call() throws IOException
{
4350 BlockingInterface server
= connection
.getAdmin(rsinfo
);
4352 // list all online regions from this region server
4353 List
<RegionInfo
> regions
= ProtobufUtil
.getOnlineRegions(server
);
4354 regions
= filterRegions(regions
);
4357 errors
.detail("RegionServer: " + rsinfo
.getServerName() +
4358 " number of regions: " + regions
.size());
4359 for (RegionInfo rinfo
: regions
) {
4360 errors
.detail(" " + rinfo
.getRegionNameAsString() +
4361 " id: " + rinfo
.getRegionId() +
4362 " encoded_name: " + rinfo
.getEncodedName() +
4363 " start: " + Bytes
.toStringBinary(rinfo
.getStartKey()) +
4364 " end: " + Bytes
.toStringBinary(rinfo
.getEndKey()));
4368 // check to see if the existence of this region matches the region in META
4369 for (RegionInfo r
:regions
) {
4370 HbckInfo hbi
= hbck
.getOrCreateInfo(r
.getEncodedName());
4371 hbi
.addServer(r
, rsinfo
);
4373 } catch (IOException e
) { // unable to connect to the region server.
4374 errors
.reportError(ERROR_CODE
.RS_CONNECT_FAILURE
, "RegionServer: " + rsinfo
.getServerName() +
4375 " Unable to fetch region information. " + e
);
4381 private List
<RegionInfo
> filterRegions(List
<RegionInfo
> regions
) {
4382 List
<RegionInfo
> ret
= Lists
.newArrayList();
4383 for (RegionInfo hri
: regions
) {
4384 if (hri
.isMetaRegion() || (!hbck
.checkMetaOnly
4385 && hbck
.isTableIncluded(hri
.getTable()))) {
4394 * Contact hdfs and get all information about specified table directory into
4397 class WorkItemHdfsDir
implements Callable
<Void
> {
4398 private FileStatus tableDir
;
4399 private ErrorReporter errors
;
4400 private FileSystem fs
;
4402 WorkItemHdfsDir(FileSystem fs
, ErrorReporter errors
,
4403 FileStatus status
) {
4405 this.tableDir
= status
;
4406 this.errors
= errors
;
4410 public synchronized Void
call() throws InterruptedException
, ExecutionException
{
4411 final Vector
<Exception
> exceptions
= new Vector
<>();
4414 final FileStatus
[] regionDirs
= fs
.listStatus(tableDir
.getPath());
4415 final List
<Future
<?
>> futures
= new ArrayList
<>(regionDirs
.length
);
4417 for (final FileStatus regionDir
: regionDirs
) {
4419 final String encodedName
= regionDir
.getPath().getName();
4420 // ignore directories that aren't hexadecimal
4421 if (!encodedName
.toLowerCase(Locale
.ROOT
).matches("[0-9a-f]+")) {
4425 if (!exceptions
.isEmpty()) {
4429 futures
.add(executor
.submit(new Runnable() {
4433 LOG
.debug("Loading region info from hdfs:"+ regionDir
.getPath());
4435 Path regioninfoFile
= new Path(regionDir
.getPath(), HRegionFileSystem
.REGION_INFO_FILE
);
4436 boolean regioninfoFileExists
= fs
.exists(regioninfoFile
);
4438 if (!regioninfoFileExists
) {
4439 // As tables become larger it is more and more likely that by the time you
4440 // reach a given region that it will be gone due to region splits/merges.
4441 if (!fs
.exists(regionDir
.getPath())) {
4442 LOG
.warn("By the time we tried to process this region dir it was already gone: "
4443 + regionDir
.getPath());
4448 HbckInfo hbi
= HBaseFsck
.this.getOrCreateInfo(encodedName
);
4449 HdfsEntry he
= new HdfsEntry();
4450 synchronized (hbi
) {
4451 if (hbi
.getHdfsRegionDir() != null) {
4452 errors
.print("Directory " + encodedName
+ " duplicate??" +
4453 hbi
.getHdfsRegionDir());
4456 he
.hdfsRegionDir
= regionDir
.getPath();
4457 he
.hdfsRegionDirModTime
= regionDir
.getModificationTime();
4458 he
.hdfsRegioninfoFilePresent
= regioninfoFileExists
;
4459 // we add to orphan list when we attempt to read .regioninfo
4461 // Set a flag if this region contains only edits
4462 // This is special case if a region is left after split
4463 he
.hdfsOnlyEdits
= true;
4464 FileStatus
[] subDirs
= fs
.listStatus(regionDir
.getPath());
4465 Path ePath
= WALSplitter
.getRegionDirRecoveredEditsDir(regionDir
.getPath());
4466 for (FileStatus subDir
: subDirs
) {
4468 String sdName
= subDir
.getPath().getName();
4469 if (!sdName
.startsWith(".") && !sdName
.equals(ePath
.getName())) {
4470 he
.hdfsOnlyEdits
= false;
4476 } catch (Exception e
) {
4477 LOG
.error("Could not load region dir", e
);
4484 // Ensure all pending tasks are complete (or that we run into an exception)
4485 for (Future
<?
> f
: futures
) {
4486 if (!exceptions
.isEmpty()) {
4491 } catch (ExecutionException e
) {
4492 LOG
.error("Unexpected exec exception! Should've been caught already. (Bug?)", e
);
4493 // Shouldn't happen, we already logged/caught any exceptions in the Runnable
4496 } catch (IOException e
) {
4497 LOG
.error("Cannot execute WorkItemHdfsDir for " + tableDir
, e
);
4500 if (!exceptions
.isEmpty()) {
4501 errors
.reportError(ERROR_CODE
.RS_CONNECT_FAILURE
, "Table Directory: "
4502 + tableDir
.getPath().getName()
4503 + " Unable to fetch all HDFS region information. ");
4504 // Just throw the first exception as an indication something bad happened
4505 // Don't need to propagate all the exceptions, we already logged them all anyway
4506 throw new ExecutionException("First exception in WorkItemHdfsDir", exceptions
.firstElement());
4514 * Contact hdfs and get all information about specified table directory into
4517 static class WorkItemHdfsRegionInfo
implements Callable
<Void
> {
4518 private HbckInfo hbi
;
4519 private HBaseFsck hbck
;
4520 private ErrorReporter errors
;
4522 WorkItemHdfsRegionInfo(HbckInfo hbi
, HBaseFsck hbck
, ErrorReporter errors
) {
4525 this.errors
= errors
;
4529 public synchronized Void
call() throws IOException
{
4530 // only load entries that haven't been loaded yet.
4531 if (hbi
.getHdfsHRI() == null) {
4534 hbck
.loadHdfsRegioninfo(hbi
);
4535 } catch (IOException ioe
) {
4536 String msg
= "Orphan region in HDFS: Unable to load .regioninfo from table "
4537 + hbi
.getTableName() + " in hdfs dir "
4538 + hbi
.getHdfsRegionDir()
4539 + "! It may be an invalid format or version file. Treating as "
4540 + "an orphaned regiondir.";
4541 errors
.reportError(ERROR_CODE
.ORPHAN_HDFS_REGION
, msg
);
4543 hbck
.debugLsr(hbi
.getHdfsRegionDir());
4544 } catch (IOException ioe2
) {
4545 LOG
.error("Unable to read directory " + hbi
.getHdfsRegionDir(), ioe2
);
4548 hbck
.orphanHdfsDirs
.add(hbi
);
4557 * Display the full report from fsck. This displays all live and dead region
4558 * servers, and all known regions.
4560 public static void setDisplayFullReport() {
4565 * Set exclusive mode.
4567 public static void setForceExclusive() {
4568 forceExclusive
= true;
4572 * Only one instance of hbck can modify HBase at a time.
4574 public boolean isExclusive() {
4575 return fixAny
|| forceExclusive
;
4580 * Print only summary of the tables and status (OK or INCONSISTENT)
4582 static void setSummary() {
4587 * Set hbase:meta check mode.
4588 * Print only info about hbase:meta table deployment/state
4590 void setCheckMetaOnly() {
4591 checkMetaOnly
= true;
4595 * Set region boundaries check mode.
4597 void setRegionBoundariesCheck() {
4598 checkRegionBoundaries
= true;
4602 * Set replication fix mode.
4604 public void setFixReplication(boolean shouldFix
) {
4605 fixReplication
= shouldFix
;
4606 fixAny
|= shouldFix
;
4609 public void setCleanReplicationBarrier(boolean shouldClean
) {
4610 cleanReplicationBarrier
= shouldClean
;
4614 * Check if we should rerun fsck again. This checks if we've tried to
4615 * fix something and we should rerun fsck tool again.
4616 * Display the full report from fsck. This displays all live and dead
4617 * region servers, and all known regions.
4619 void setShouldRerun() {
4623 public boolean shouldRerun() {
4628 * Fix inconsistencies found by fsck. This should try to fix errors (if any)
4629 * found by fsck utility.
4631 public void setFixAssignments(boolean shouldFix
) {
4632 fixAssignments
= shouldFix
;
4633 fixAny
|= shouldFix
;
4636 boolean shouldFixAssignments() {
4637 return fixAssignments
;
4640 public void setFixMeta(boolean shouldFix
) {
4641 fixMeta
= shouldFix
;
4642 fixAny
|= shouldFix
;
4645 boolean shouldFixMeta() {
4649 public void setFixEmptyMetaCells(boolean shouldFix
) {
4650 fixEmptyMetaCells
= shouldFix
;
4651 fixAny
|= shouldFix
;
4654 boolean shouldFixEmptyMetaCells() {
4655 return fixEmptyMetaCells
;
4658 public void setCheckHdfs(boolean checking
) {
4659 checkHdfs
= checking
;
4662 boolean shouldCheckHdfs() {
4666 public void setFixHdfsHoles(boolean shouldFix
) {
4667 fixHdfsHoles
= shouldFix
;
4668 fixAny
|= shouldFix
;
4671 boolean shouldFixHdfsHoles() {
4672 return fixHdfsHoles
;
4675 public void setFixTableOrphans(boolean shouldFix
) {
4676 fixTableOrphans
= shouldFix
;
4677 fixAny
|= shouldFix
;
4680 boolean shouldFixTableOrphans() {
4681 return fixTableOrphans
;
4684 public void setFixHdfsOverlaps(boolean shouldFix
) {
4685 fixHdfsOverlaps
= shouldFix
;
4686 fixAny
|= shouldFix
;
4689 boolean shouldFixHdfsOverlaps() {
4690 return fixHdfsOverlaps
;
4693 public void setFixHdfsOrphans(boolean shouldFix
) {
4694 fixHdfsOrphans
= shouldFix
;
4695 fixAny
|= shouldFix
;
4698 boolean shouldFixHdfsOrphans() {
4699 return fixHdfsOrphans
;
4702 public void setFixVersionFile(boolean shouldFix
) {
4703 fixVersionFile
= shouldFix
;
4704 fixAny
|= shouldFix
;
4707 public boolean shouldFixVersionFile() {
4708 return fixVersionFile
;
4711 public void setSidelineBigOverlaps(boolean sbo
) {
4712 this.sidelineBigOverlaps
= sbo
;
4715 public boolean shouldSidelineBigOverlaps() {
4716 return sidelineBigOverlaps
;
4719 public void setFixSplitParents(boolean shouldFix
) {
4720 fixSplitParents
= shouldFix
;
4721 fixAny
|= shouldFix
;
4724 public void setRemoveParents(boolean shouldFix
) {
4725 removeParents
= shouldFix
;
4726 fixAny
|= shouldFix
;
4729 boolean shouldFixSplitParents() {
4730 return fixSplitParents
;
4733 boolean shouldRemoveParents() {
4734 return removeParents
;
4737 public void setFixReferenceFiles(boolean shouldFix
) {
4738 fixReferenceFiles
= shouldFix
;
4739 fixAny
|= shouldFix
;
4742 boolean shouldFixReferenceFiles() {
4743 return fixReferenceFiles
;
4746 public void setFixHFileLinks(boolean shouldFix
) {
4747 fixHFileLinks
= shouldFix
;
4748 fixAny
|= shouldFix
;
4751 boolean shouldFixHFileLinks() {
4752 return fixHFileLinks
;
4755 public boolean shouldIgnorePreCheckPermission() {
4756 return !fixAny
|| ignorePreCheckPermission
;
4759 public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission
) {
4760 this.ignorePreCheckPermission
= ignorePreCheckPermission
;
4764 * @param mm maximum number of regions to merge into a single region.
4766 public void setMaxMerge(int mm
) {
4770 public int getMaxMerge() {
4774 public void setMaxOverlapsToSideline(int mo
) {
4775 this.maxOverlapsToSideline
= mo
;
4778 public int getMaxOverlapsToSideline() {
4779 return maxOverlapsToSideline
;
4783 * Only check/fix tables specified by the list,
4784 * Empty list means all tables are included.
4786 boolean isTableIncluded(TableName table
) {
4787 return (tablesIncluded
.isEmpty()) || tablesIncluded
.contains(table
);
4790 public void includeTable(TableName table
) {
4791 tablesIncluded
.add(table
);
4794 Set
<TableName
> getIncludedTables() {
4795 return new HashSet
<>(tablesIncluded
);
4799 * We are interested in only those tables that have not changed their state in
4800 * hbase:meta during the last few seconds specified by hbase.admin.fsck.timelag
4801 * @param seconds - the time in seconds
4803 public void setTimeLag(long seconds
) {
4804 timelag
= seconds
* 1000; // convert to milliseconds
4809 * @param sidelineDir - HDFS path to sideline data
4811 public void setSidelineDir(String sidelineDir
) {
4812 this.sidelineDir
= new Path(sidelineDir
);
4815 protected HFileCorruptionChecker
createHFileCorruptionChecker(boolean sidelineCorruptHFiles
) throws IOException
{
4816 return new HFileCorruptionChecker(getConf(), executor
, sidelineCorruptHFiles
);
4819 public HFileCorruptionChecker
getHFilecorruptionChecker() {
4823 public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc
) {
4827 public void setRetCode(int code
) {
4828 this.retcode
= code
;
4831 public int getRetCode() {
4835 protected HBaseFsck
printUsageAndExit() {
4836 StringWriter sw
= new StringWriter(2048);
4837 PrintWriter out
= new PrintWriter(sw
);
4839 out
.println("-----------------------------------------------------------------------");
4840 out
.println("NOTE: As of HBase version 2.0, the hbck tool is significantly changed.");
4841 out
.println("In general, all Read-Only options are supported and can be be used");
4842 out
.println("safely. Most -fix/ -repair options are NOT supported. Please see usage");
4843 out
.println("below for details on which options are not supported.");
4844 out
.println("-----------------------------------------------------------------------");
4846 out
.println("Usage: fsck [opts] {only tables}");
4847 out
.println(" where [opts] are:");
4848 out
.println(" -help Display help options (this)");
4849 out
.println(" -details Display full report of all regions.");
4850 out
.println(" -timelag <timeInSeconds> Process only regions that " +
4851 " have not experienced any metadata updates in the last " +
4852 " <timeInSeconds> seconds.");
4853 out
.println(" -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
4854 " before checking if the fix worked if run with -fix");
4855 out
.println(" -summary Print only summary of the tables and status.");
4856 out
.println(" -metaonly Only check the state of the hbase:meta table.");
4857 out
.println(" -sidelineDir <hdfs://> HDFS path to backup existing meta.");
4858 out
.println(" -boundaries Verify that regions boundaries are the same between META and store files.");
4859 out
.println(" -exclusive Abort if another hbck is exclusive or fixing.");
4862 out
.println(" Datafile Repair options: (expert features, use with caution!)");
4863 out
.println(" -checkCorruptHFiles Check all Hfiles by opening them to make sure they are valid");
4864 out
.println(" -sidelineCorruptHFiles Quarantine corrupted HFiles. implies -checkCorruptHFiles");
4867 out
.println(" Replication options");
4868 out
.println(" -fixReplication Deletes replication queues for removed peers");
4871 out
.println(" Metadata Repair options supported as of version 2.0: (expert features, use with caution!)");
4872 out
.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs.");
4873 out
.println(" -fixReferenceFiles Try to offline lingering reference store files");
4874 out
.println(" -fixHFileLinks Try to offline lingering HFileLinks");
4875 out
.println(" -noHdfsChecking Don't load/check region info from HDFS."
4876 + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
4877 out
.println(" -ignorePreCheckPermission ignore filesystem permission pre-check");
4880 out
.println("NOTE: Following options are NOT supported as of HBase version 2.0+.");
4882 out
.println(" UNSUPPORTED Metadata Repair options: (expert features, use with caution!)");
4883 out
.println(" -fix Try to fix region assignments. This is for backwards compatiblity");
4884 out
.println(" -fixAssignments Try to fix region assignments. Replaces the old -fix");
4885 out
.println(" -fixMeta Try to fix meta problems. This assumes HDFS region info is good.");
4886 out
.println(" -fixHdfsHoles Try to fix region holes in hdfs.");
4887 out
.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs");
4888 out
.println(" -fixTableOrphans Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
4889 out
.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs.");
4890 out
.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE
+" by default)");
4891 out
.println(" -sidelineBigOverlaps When fixing region overlaps, allow to sideline big overlaps");
4892 out
.println(" -maxOverlapsToSideline <n> When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE
+" by default)");
4893 out
.println(" -fixSplitParents Try to force offline split parents to be online.");
4894 out
.println(" -removeParents Try to offline and sideline lingering parents and keep daughter regions.");
4895 out
.println(" -fixEmptyMetaCells Try to fix hbase:meta entries not referencing any region"
4896 + " (empty REGIONINFO_QUALIFIER rows)");
4899 out
.println(" UNSUPPORTED Metadata Repair shortcuts");
4900 out
.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
4901 "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles" +
4903 out
.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
4905 out
.println(" Replication options");
4906 out
.println(" -fixReplication Deletes replication queues for removed peers");
4907 out
.println(" -cleanReplicationBrarier [tableName] clean the replication barriers " +
4908 "of a specified table, tableName is required");
4910 errors
.reportError(ERROR_CODE
.WRONG_USAGE
, sw
.toString());
4922 public static void main(String
[] args
) throws Exception
{
4923 // create a fsck object
4924 Configuration conf
= HBaseConfiguration
.create();
4925 Path hbasedir
= FSUtils
.getRootDir(conf
);
4926 URI defaultFs
= hbasedir
.getFileSystem(conf
).getUri();
4927 FSUtils
.setFsDefault(conf
, new Path(defaultFs
));
4928 int ret
= ToolRunner
.run(new HBaseFsckTool(conf
), args
);
4933 * This is a Tool wrapper that gathers -Dxxx=yyy configuration settings from the command line.
4935 static class HBaseFsckTool
extends Configured
implements Tool
{
4936 HBaseFsckTool(Configuration conf
) { super(conf
); }
4938 public int run(String
[] args
) throws Exception
{
4939 HBaseFsck hbck
= new HBaseFsck(getConf());
4940 hbck
.exec(hbck
.executor
, args
);
4942 return hbck
.getRetCode();
4946 public HBaseFsck
exec(ExecutorService exec
, String
[] args
)
4947 throws KeeperException
, IOException
, InterruptedException
, ReplicationException
{
4948 long sleepBeforeRerun
= DEFAULT_SLEEP_BEFORE_RERUN
;
4950 boolean checkCorruptHFiles
= false;
4951 boolean sidelineCorruptHFiles
= false;
4953 // Process command-line args.
4954 for (int i
= 0; i
< args
.length
; i
++) {
4955 String cmd
= args
[i
];
4956 if (cmd
.equals("-help") || cmd
.equals("-h")) {
4957 return printUsageAndExit();
4958 } else if (cmd
.equals("-details")) {
4959 setDisplayFullReport();
4960 } else if (cmd
.equals("-exclusive")) {
4961 setForceExclusive();
4962 } else if (cmd
.equals("-timelag")) {
4963 if (i
== args
.length
- 1) {
4964 errors
.reportError(ERROR_CODE
.WRONG_USAGE
, "HBaseFsck: -timelag needs a value.");
4965 return printUsageAndExit();
4968 long timelag
= Long
.parseLong(args
[++i
]);
4969 setTimeLag(timelag
);
4970 } catch (NumberFormatException e
) {
4971 errors
.reportError(ERROR_CODE
.WRONG_USAGE
, "-timelag needs a numeric value.");
4972 return printUsageAndExit();
4974 } else if (cmd
.equals("-sleepBeforeRerun")) {
4975 if (i
== args
.length
- 1) {
4976 errors
.reportError(ERROR_CODE
.WRONG_USAGE
,
4977 "HBaseFsck: -sleepBeforeRerun needs a value.");
4978 return printUsageAndExit();
4981 sleepBeforeRerun
= Long
.parseLong(args
[++i
]);
4982 } catch (NumberFormatException e
) {
4983 errors
.reportError(ERROR_CODE
.WRONG_USAGE
, "-sleepBeforeRerun needs a numeric value.");
4984 return printUsageAndExit();
4986 } else if (cmd
.equals("-sidelineDir")) {
4987 if (i
== args
.length
- 1) {
4988 errors
.reportError(ERROR_CODE
.WRONG_USAGE
, "HBaseFsck: -sidelineDir needs a value.");
4989 return printUsageAndExit();
4991 setSidelineDir(args
[++i
]);
4992 } else if (cmd
.equals("-fix")) {
4993 errors
.reportError(ERROR_CODE
.WRONG_USAGE
,
4994 "This option is deprecated, please use -fixAssignments instead.");
4995 setFixAssignments(true);
4996 } else if (cmd
.equals("-fixAssignments")) {
4997 setFixAssignments(true);
4998 } else if (cmd
.equals("-fixMeta")) {
5000 } else if (cmd
.equals("-noHdfsChecking")) {
5001 setCheckHdfs(false);
5002 } else if (cmd
.equals("-fixHdfsHoles")) {
5003 setFixHdfsHoles(true);
5004 } else if (cmd
.equals("-fixHdfsOrphans")) {
5005 setFixHdfsOrphans(true);
5006 } else if (cmd
.equals("-fixTableOrphans")) {
5007 setFixTableOrphans(true);
5008 } else if (cmd
.equals("-fixHdfsOverlaps")) {
5009 setFixHdfsOverlaps(true);
5010 } else if (cmd
.equals("-fixVersionFile")) {
5011 setFixVersionFile(true);
5012 } else if (cmd
.equals("-sidelineBigOverlaps")) {
5013 setSidelineBigOverlaps(true);
5014 } else if (cmd
.equals("-fixSplitParents")) {
5015 setFixSplitParents(true);
5016 } else if (cmd
.equals("-removeParents")) {
5017 setRemoveParents(true);
5018 } else if (cmd
.equals("-ignorePreCheckPermission")) {
5019 setIgnorePreCheckPermission(true);
5020 } else if (cmd
.equals("-checkCorruptHFiles")) {
5021 checkCorruptHFiles
= true;
5022 } else if (cmd
.equals("-sidelineCorruptHFiles")) {
5023 sidelineCorruptHFiles
= true;
5024 } else if (cmd
.equals("-fixReferenceFiles")) {
5025 setFixReferenceFiles(true);
5026 } else if (cmd
.equals("-fixHFileLinks")) {
5027 setFixHFileLinks(true);
5028 } else if (cmd
.equals("-fixEmptyMetaCells")) {
5029 setFixEmptyMetaCells(true);
5030 } else if (cmd
.equals("-repair")) {
5031 // this attempts to merge overlapping hdfs regions, needs testing
5033 setFixHdfsHoles(true);
5034 setFixHdfsOrphans(true);
5036 setFixAssignments(true);
5037 setFixHdfsOverlaps(true);
5038 setFixVersionFile(true);
5039 setSidelineBigOverlaps(true);
5040 setFixSplitParents(false);
5042 setFixReferenceFiles(true);
5043 setFixHFileLinks(true);
5044 } else if (cmd
.equals("-repairHoles")) {
5045 // this will make all missing hdfs regions available but may lose data
5046 setFixHdfsHoles(true);
5047 setFixHdfsOrphans(false);
5049 setFixAssignments(true);
5050 setFixHdfsOverlaps(false);
5051 setSidelineBigOverlaps(false);
5052 setFixSplitParents(false);
5054 } else if (cmd
.equals("-maxOverlapsToSideline")) {
5055 if (i
== args
.length
- 1) {
5056 errors
.reportError(ERROR_CODE
.WRONG_USAGE
,
5057 "-maxOverlapsToSideline needs a numeric value argument.");
5058 return printUsageAndExit();
5061 int maxOverlapsToSideline
= Integer
.parseInt(args
[++i
]);
5062 setMaxOverlapsToSideline(maxOverlapsToSideline
);
5063 } catch (NumberFormatException e
) {
5064 errors
.reportError(ERROR_CODE
.WRONG_USAGE
,
5065 "-maxOverlapsToSideline needs a numeric value argument.");
5066 return printUsageAndExit();
5068 } else if (cmd
.equals("-maxMerge")) {
5069 if (i
== args
.length
- 1) {
5070 errors
.reportError(ERROR_CODE
.WRONG_USAGE
,
5071 "-maxMerge needs a numeric value argument.");
5072 return printUsageAndExit();
5075 int maxMerge
= Integer
.parseInt(args
[++i
]);
5076 setMaxMerge(maxMerge
);
5077 } catch (NumberFormatException e
) {
5078 errors
.reportError(ERROR_CODE
.WRONG_USAGE
,
5079 "-maxMerge needs a numeric value argument.");
5080 return printUsageAndExit();
5082 } else if (cmd
.equals("-summary")) {
5084 } else if (cmd
.equals("-metaonly")) {
5086 } else if (cmd
.equals("-boundaries")) {
5087 setRegionBoundariesCheck();
5088 } else if (cmd
.equals("-fixReplication")) {
5089 setFixReplication(true);
5090 } else if (cmd
.equals("-cleanReplicationBarrier")) {
5091 setCleanReplicationBarrier(true);
5092 if(args
[++i
].startsWith("-")){
5093 printUsageAndExit();
5095 setCleanReplicationBarrierTable(args
[i
]);
5096 } else if (cmd
.startsWith("-")) {
5097 errors
.reportError(ERROR_CODE
.WRONG_USAGE
, "Unrecognized option:" + cmd
);
5098 return printUsageAndExit();
5100 includeTable(TableName
.valueOf(cmd
));
5101 errors
.print("Allow checking/fixes for table: " + cmd
);
5105 errors
.print("HBaseFsck command line options: " + StringUtils
.join(args
, " "));
5107 // pre-check current user has FS write permission or not
5109 preCheckPermission();
5110 } catch (AccessDeniedException ace
) {
5111 Runtime
.getRuntime().exit(-1);
5112 } catch (IOException ioe
) {
5113 Runtime
.getRuntime().exit(-1);
5116 // do the real work of hbck
5119 // after connecting to server above, we have server version
5120 // check if unsupported option is specified based on server version
5121 if (!isOptionsSupported(args
)) {
5122 return printUsageAndExit();
5126 // if corrupt file mode is on, first fix them since they may be opened later
5127 if (checkCorruptHFiles
|| sidelineCorruptHFiles
) {
5128 LOG
.info("Checking all hfiles for corruption");
5129 HFileCorruptionChecker hfcc
= createHFileCorruptionChecker(sidelineCorruptHFiles
);
5130 setHFileCorruptionChecker(hfcc
); // so we can get result
5131 Collection
<TableName
> tables
= getIncludedTables();
5132 Collection
<Path
> tableDirs
= new ArrayList
<>();
5133 Path rootdir
= FSUtils
.getRootDir(getConf());
5134 if (tables
.size() > 0) {
5135 for (TableName t
: tables
) {
5136 tableDirs
.add(FSUtils
.getTableDir(rootdir
, t
));
5139 tableDirs
= FSUtils
.getTableDirs(FSUtils
.getCurrentFileSystem(getConf()), rootdir
);
5141 hfcc
.checkTables(tableDirs
);
5142 hfcc
.report(errors
);
5145 // check and fix table integrity, region consistency.
5146 int code
= onlineHbck();
5148 // If we have changed the HBase state it is better to run hbck again
5149 // to see if we haven't broken something else in the process.
5150 // We run it only once more because otherwise we can easily fall into
5151 // an infinite loop.
5152 if (shouldRerun()) {
5154 LOG
.info("Sleeping " + sleepBeforeRerun
+ "ms before re-checking after fix...");
5155 Thread
.sleep(sleepBeforeRerun
);
5156 } catch (InterruptedException ie
) {
5157 LOG
.warn("Interrupted while sleeping");
5161 setFixAssignments(false);
5163 setFixHdfsHoles(false);
5164 setFixHdfsOverlaps(false);
5165 setFixVersionFile(false);
5166 setFixTableOrphans(false);
5167 errors
.resetErrors();
5168 code
= onlineHbck();
5172 IOUtils
.closeQuietly(this);
5177 private boolean isOptionsSupported(String
[] args
) {
5178 boolean result
= true;
5179 String hbaseServerVersion
= status
.getHBaseVersion();
5180 if (VersionInfo
.compareVersion("2.any.any", hbaseServerVersion
) < 0) {
5181 // Process command-line args.
5182 for (String arg
: args
) {
5183 if (unsupportedOptionsInV2
.contains(arg
)) {
5184 errors
.reportError(ERROR_CODE
.UNSUPPORTED_OPTION
,
5185 "option '" + arg
+ "' is not " + "supportted!");
5194 public void setCleanReplicationBarrierTable(String cleanReplicationBarrierTable
) {
5195 this.cleanReplicationBarrierTable
= TableName
.valueOf(cleanReplicationBarrierTable
);
5198 public void cleanReplicationBarrier() throws IOException
{
5199 if (!cleanReplicationBarrier
|| cleanReplicationBarrierTable
== null) {
5202 if (cleanReplicationBarrierTable
.isSystemTable()) {
5203 errors
.reportError(ERROR_CODE
.INVALID_TABLE
,
5204 "invalid table: " + cleanReplicationBarrierTable
);
5208 boolean isGlobalScope
= false;
5210 isGlobalScope
= admin
.getDescriptor(cleanReplicationBarrierTable
).hasGlobalReplicationScope();
5211 } catch (TableNotFoundException e
) {
5212 LOG
.info("we may need to clean some erroneous data due to bugs");
5215 if (isGlobalScope
) {
5216 errors
.reportError(ERROR_CODE
.INVALID_TABLE
,
5217 "table's replication scope is global: " + cleanReplicationBarrierTable
);
5220 List
<byte[]> regionNames
= new ArrayList
<>();
5221 Scan barrierScan
= new Scan();
5222 barrierScan
.setCaching(100);
5223 barrierScan
.addFamily(HConstants
.REPLICATION_BARRIER_FAMILY
);
5225 .withStartRow(MetaTableAccessor
.getTableStartRowForMeta(cleanReplicationBarrierTable
,
5226 MetaTableAccessor
.QueryType
.REGION
))
5227 .withStopRow(MetaTableAccessor
.getTableStopRowForMeta(cleanReplicationBarrierTable
,
5228 MetaTableAccessor
.QueryType
.REGION
));
5230 try (ResultScanner scanner
= meta
.getScanner(barrierScan
)) {
5231 while ((result
= scanner
.next()) != null) {
5232 regionNames
.add(result
.getRow());
5235 if (regionNames
.size() <= 0) {
5236 errors
.reportError(ERROR_CODE
.INVALID_TABLE
,
5237 "there is no barriers of this table: " + cleanReplicationBarrierTable
);
5240 ReplicationQueueStorage queueStorage
=
5241 ReplicationStorageFactory
.getReplicationQueueStorage(zkw
, getConf());
5242 List
<ReplicationPeerDescription
> peerDescriptions
= admin
.listReplicationPeers();
5243 if (peerDescriptions
!= null && peerDescriptions
.size() > 0) {
5244 List
<String
> peers
= peerDescriptions
.stream()
5245 .filter(peerConfig
-> ReplicationUtils
.contains(peerConfig
.getPeerConfig(),
5246 cleanReplicationBarrierTable
))
5247 .map(peerConfig
-> peerConfig
.getPeerId()).collect(Collectors
.toList());
5249 List
<String
> batch
= new ArrayList
<>();
5250 for (String peer
: peers
) {
5251 for (byte[] regionName
: regionNames
) {
5252 batch
.add(RegionInfo
.encodeRegionName(regionName
));
5253 if (batch
.size() % 100 == 0) {
5254 queueStorage
.removeLastSequenceIds(peer
, batch
);
5258 if (batch
.size() > 0) {
5259 queueStorage
.removeLastSequenceIds(peer
, batch
);
5263 } catch (ReplicationException re
) {
5264 throw new IOException(re
);
5267 for (byte[] regionName
: regionNames
) {
5268 meta
.delete(new Delete(regionName
).addFamily(HConstants
.REPLICATION_BARRIER_FAMILY
));
5274 * ls -r for debugging purposes
5276 void debugLsr(Path p
) throws IOException
{
5277 debugLsr(getConf(), p
, errors
);
5281 * ls -r for debugging purposes
5283 public static void debugLsr(Configuration conf
,
5284 Path p
) throws IOException
{
5285 debugLsr(conf
, p
, new PrintingErrorReporter());
5289 * ls -r for debugging purposes
5291 public static void debugLsr(Configuration conf
,
5292 Path p
, ErrorReporter errors
) throws IOException
{
5293 if (!LOG
.isDebugEnabled() || p
== null) {
5296 FileSystem fs
= p
.getFileSystem(conf
);
5298 if (!fs
.exists(p
)) {
5302 errors
.print(p
.toString());
5308 if (fs
.getFileStatus(p
).isDirectory()) {
5309 FileStatus
[] fss
= fs
.listStatus(p
);
5310 for (FileStatus status
: fss
) {
5311 debugLsr(conf
, status
.getPath(), errors
);