HBASE-26304 Reflect out of band locality improvements in metrics and balancer (#3803)
[hbase.git] / hbase-server / src / main / java / org / apache / hadoop / hbase / util / HBaseFsck.java
blobb91d7d9357258706b59eba9ee90b2a9187a21d74
1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org.apache.hadoop.hbase.util;
20 import java.io.Closeable;
21 import java.io.FileNotFoundException;
22 import java.io.IOException;
23 import java.io.InterruptedIOException;
24 import java.io.PrintWriter;
25 import java.io.StringWriter;
26 import java.net.InetAddress;
27 import java.net.URI;
28 import java.util.ArrayList;
29 import java.util.Collection;
30 import java.util.Collections;
31 import java.util.Comparator;
32 import java.util.EnumSet;
33 import java.util.HashMap;
34 import java.util.HashSet;
35 import java.util.Iterator;
36 import java.util.List;
37 import java.util.Locale;
38 import java.util.Map;
39 import java.util.Map.Entry;
40 import java.util.Objects;
41 import java.util.Optional;
42 import java.util.Set;
43 import java.util.SortedMap;
44 import java.util.TreeMap;
45 import java.util.Vector;
46 import java.util.concurrent.Callable;
47 import java.util.concurrent.ConcurrentSkipListMap;
48 import java.util.concurrent.ExecutionException;
49 import java.util.concurrent.ExecutorService;
50 import java.util.concurrent.Executors;
51 import java.util.concurrent.Future;
52 import java.util.concurrent.FutureTask;
53 import java.util.concurrent.ScheduledThreadPoolExecutor;
54 import java.util.concurrent.TimeUnit;
55 import java.util.concurrent.TimeoutException;
56 import java.util.concurrent.atomic.AtomicBoolean;
57 import java.util.concurrent.atomic.AtomicInteger;
58 import java.util.stream.Collectors;
59 import org.apache.commons.io.IOUtils;
60 import org.apache.commons.lang3.StringUtils;
61 import org.apache.hadoop.conf.Configuration;
62 import org.apache.hadoop.conf.Configured;
63 import org.apache.hadoop.fs.FSDataOutputStream;
64 import org.apache.hadoop.fs.FileStatus;
65 import org.apache.hadoop.fs.FileSystem;
66 import org.apache.hadoop.fs.Path;
67 import org.apache.hadoop.fs.permission.FsAction;
68 import org.apache.hadoop.fs.permission.FsPermission;
69 import org.apache.hadoop.hbase.Abortable;
70 import org.apache.hadoop.hbase.CatalogFamilyFormat;
71 import org.apache.hadoop.hbase.Cell;
72 import org.apache.hadoop.hbase.CellUtil;
73 import org.apache.hadoop.hbase.ClientMetaTableAccessor;
74 import org.apache.hadoop.hbase.ClusterMetrics;
75 import org.apache.hadoop.hbase.ClusterMetrics.Option;
76 import org.apache.hadoop.hbase.HBaseConfiguration;
77 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
78 import org.apache.hadoop.hbase.HConstants;
79 import org.apache.hadoop.hbase.HRegionLocation;
80 import org.apache.hadoop.hbase.KeyValue;
81 import org.apache.hadoop.hbase.MasterNotRunningException;
82 import org.apache.hadoop.hbase.MetaTableAccessor;
83 import org.apache.hadoop.hbase.RegionLocations;
84 import org.apache.hadoop.hbase.ServerName;
85 import org.apache.hadoop.hbase.TableName;
86 import org.apache.hadoop.hbase.TableNotFoundException;
87 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
88 import org.apache.hadoop.hbase.client.Admin;
89 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
90 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
91 import org.apache.hadoop.hbase.client.Connection;
92 import org.apache.hadoop.hbase.client.ConnectionFactory;
93 import org.apache.hadoop.hbase.client.Delete;
94 import org.apache.hadoop.hbase.client.Get;
95 import org.apache.hadoop.hbase.client.Put;
96 import org.apache.hadoop.hbase.client.RegionInfo;
97 import org.apache.hadoop.hbase.client.RegionInfoBuilder;
98 import org.apache.hadoop.hbase.client.RegionLocator;
99 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
100 import org.apache.hadoop.hbase.client.Result;
101 import org.apache.hadoop.hbase.client.ResultScanner;
102 import org.apache.hadoop.hbase.client.RowMutations;
103 import org.apache.hadoop.hbase.client.Scan;
104 import org.apache.hadoop.hbase.client.Table;
105 import org.apache.hadoop.hbase.client.TableDescriptor;
106 import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
107 import org.apache.hadoop.hbase.client.TableState;
108 import org.apache.hadoop.hbase.io.FileLink;
109 import org.apache.hadoop.hbase.io.HFileLink;
110 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
111 import org.apache.hadoop.hbase.io.hfile.HFile;
112 import org.apache.hadoop.hbase.master.RegionState;
113 import org.apache.hadoop.hbase.regionserver.HRegion;
114 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
115 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
116 import org.apache.hadoop.hbase.replication.ReplicationException;
117 import org.apache.hadoop.hbase.replication.ReplicationPeerDescription;
118 import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
119 import org.apache.hadoop.hbase.replication.ReplicationStorageFactory;
120 import org.apache.hadoop.hbase.security.UserProvider;
121 import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
122 import org.apache.hadoop.hbase.util.HbckErrorReporter.ERROR_CODE;
123 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
124 import org.apache.hadoop.hbase.util.hbck.ReplicationChecker;
125 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
126 import org.apache.hadoop.hbase.wal.WALSplitUtil;
127 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
128 import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
129 import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
130 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
131 import org.apache.hadoop.ipc.RemoteException;
132 import org.apache.hadoop.security.AccessControlException;
133 import org.apache.hadoop.security.UserGroupInformation;
134 import org.apache.hadoop.util.ReflectionUtils;
135 import org.apache.hadoop.util.Tool;
136 import org.apache.hadoop.util.ToolRunner;
137 import org.apache.hbase.thirdparty.com.google.common.base.Joiner;
138 import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
139 import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
140 import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
141 import org.apache.hbase.thirdparty.com.google.common.io.Closeables;
142 import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
143 import org.apache.yetus.audience.InterfaceAudience;
144 import org.apache.yetus.audience.InterfaceStability;
145 import org.apache.zookeeper.KeeperException;
146 import org.slf4j.Logger;
147 import org.slf4j.LoggerFactory;
150 * HBaseFsck (hbck) is a tool for checking and repairing region consistency and
151 * table integrity problems in a corrupted HBase. This tool was written for hbase-1.x. It does not
152 * work with hbase-2.x; it can read state but is not allowed to change state; i.e. effect 'repair'.
153 * Even though it can 'read' state, given how so much has changed in how hbase1 and hbase2 operate,
154 * it will often misread. See hbck2 (HBASE-19121) for a hbck tool for hbase2. This class is
155 * deprecated.
157 * <p>
158 * Region consistency checks verify that hbase:meta, region deployment on region
159 * servers and the state of data in HDFS (.regioninfo files) all are in
160 * accordance.
161 * <p>
162 * Table integrity checks verify that all possible row keys resolve to exactly
163 * one region of a table. This means there are no individual degenerate
164 * or backwards regions; no holes between regions; and that there are no
165 * overlapping regions.
166 * <p>
167 * The general repair strategy works in two phases:
168 * <ol>
169 * <li> Repair Table Integrity on HDFS. (merge or fabricate regions)
170 * <li> Repair Region Consistency with hbase:meta and assignments
171 * </ol>
172 * <p>
173 * For table integrity repairs, the tables' region directories are scanned
174 * for .regioninfo files. Each table's integrity is then verified. If there
175 * are any orphan regions (regions with no .regioninfo files) or holes, new
176 * regions are fabricated. Backwards regions are sidelined as well as empty
177 * degenerate (endkey==startkey) regions. If there are any overlapping regions,
178 * a new region is created and all data is merged into the new region.
179 * <p>
180 * Table integrity repairs deal solely with HDFS and could potentially be done
181 * offline -- the hbase region servers or master do not need to be running.
182 * This phase can eventually be used to completely reconstruct the hbase:meta table in
183 * an offline fashion.
184 * <p>
185 * Region consistency requires three conditions -- 1) valid .regioninfo file
186 * present in an HDFS region dir, 2) valid row with .regioninfo data in META,
187 * and 3) a region is deployed only at the regionserver that was assigned to
188 * with proper state in the master.
189 * <p>
190 * Region consistency repairs require hbase to be online so that hbck can
191 * contact the HBase master and region servers. The hbck#connect() method must
192 * first be called successfully. Much of the region consistency information
193 * is transient and less risky to repair.
194 * <p>
195 * If hbck is run from the command line, there are a handful of arguments that
196 * can be used to limit the kinds of repairs hbck will do. See the code in
197 * {@link #printUsageAndExit()} for more details.
198 * @deprecated For removal in hbase-4.0.0. Use HBCK2 instead.
200 @Deprecated
201 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
202 @InterfaceStability.Evolving
203 public class HBaseFsck extends Configured implements Closeable {
204 public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
205 public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
206 private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
207 private static boolean rsSupportsOffline = true;
208 private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
209 private static final int DEFAULT_MAX_MERGE = 5;
212 * Here is where hbase-1.x used to default the lock for hbck1.
213 * It puts in place a lock when it goes to write/make changes.
215 @InterfaceAudience.Private
216 public static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
217 private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS = 5;
218 private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
219 private static final int DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
220 // We have to set the timeout value > HdfsConstants.LEASE_SOFTLIMIT_PERIOD.
221 // In HADOOP-2.6 and later, the Namenode proxy now created with custom RetryPolicy for
222 // AlreadyBeingCreatedException which is implies timeout on this operations up to
223 // HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds).
224 private static final int DEFAULT_WAIT_FOR_LOCK_TIMEOUT = 80; // seconds
225 private static final int DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS = 5;
226 private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
227 private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
229 /**********************
230 * Internal resources
231 **********************/
232 private static final Logger LOG = LoggerFactory.getLogger(HBaseFsck.class.getName());
233 private ClusterMetrics status;
234 private Connection connection;
235 private Admin admin;
236 private Table meta;
237 // threads to do ||izable tasks: retrieve data from regionservers, handle overlapping regions
238 protected ExecutorService executor;
239 private long startMillis = EnvironmentEdgeManager.currentTime();
240 private HFileCorruptionChecker hfcc;
241 private int retcode = 0;
242 private Path HBCK_LOCK_PATH;
243 private FSDataOutputStream hbckOutFd;
244 // This lock is to prevent cleanup of balancer resources twice between
245 // ShutdownHook and the main code. We cleanup only if the connect() is
246 // successful
247 private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
249 // Unsupported options in HBase 2.0+
250 private static final Set<String> unsupportedOptionsInV2 = Sets.newHashSet("-fix",
251 "-fixAssignments", "-fixMeta", "-fixHdfsHoles", "-fixHdfsOrphans", "-fixTableOrphans",
252 "-fixHdfsOverlaps", "-sidelineBigOverlaps", "-fixSplitParents", "-removeParents",
253 "-fixEmptyMetaCells", "-repair", "-repairHoles", "-maxOverlapsToSideline", "-maxMerge");
255 /***********
256 * Options
257 ***********/
258 private static boolean details = false; // do we display the full report
259 private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
260 private static boolean forceExclusive = false; // only this hbck can modify HBase
261 private boolean fixAssignments = false; // fix assignment errors?
262 private boolean fixMeta = false; // fix meta errors?
263 private boolean checkHdfs = true; // load and check fs consistency?
264 private boolean fixHdfsHoles = false; // fix fs holes?
265 private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
266 private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
267 private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
268 private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
269 private boolean fixSplitParents = false; // fix lingering split parents
270 private boolean removeParents = false; // remove split parents
271 private boolean fixReferenceFiles = false; // fix lingering reference store file
272 private boolean fixHFileLinks = false; // fix lingering HFileLinks
273 private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
274 private boolean fixReplication = false; // fix undeleted replication queues for removed peer
275 private boolean cleanReplicationBarrier = false; // clean replication barriers of a table
276 private boolean fixAny = false; // Set to true if any of the fix is required.
278 // limit checking/fixes to listed tables, if empty attempt to check/fix all
279 // hbase:meta are always checked
280 private Set<TableName> tablesIncluded = new HashSet<>();
281 private TableName cleanReplicationBarrierTable;
282 private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
283 // maximum number of overlapping regions to sideline
284 private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE;
285 private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
286 private Path sidelineDir = null;
288 private boolean rerun = false; // if we tried to fix something, rerun hbck
289 private static boolean summary = false; // if we want to print less output
290 private boolean checkMetaOnly = false;
291 private boolean checkRegionBoundaries = false;
292 private boolean ignorePreCheckPermission = false; // if pre-check permission
294 /*********
295 * State
296 *********/
297 final private HbckErrorReporter errors;
298 int fixes = 0;
301 * This map contains the state of all hbck items. It maps from encoded region
302 * name to HbckRegionInfo structure. The information contained in HbckRegionInfo is used
303 * to detect and correct consistency (hdfs/meta/deployment) problems.
305 private TreeMap<String, HbckRegionInfo> regionInfoMap = new TreeMap<>();
306 // Empty regioninfo qualifiers in hbase:meta
307 private Set<Result> emptyRegionInfoQualifiers = new HashSet<>();
310 * This map from Tablename -> TableInfo contains the structures necessary to
311 * detect table consistency problems (holes, dupes, overlaps). It is sorted
312 * to prevent dupes.
314 * If tablesIncluded is empty, this map contains all tables.
315 * Otherwise, it contains only meta tables and tables in tablesIncluded,
316 * unless checkMetaOnly is specified, in which case, it contains only
317 * the meta table
319 private SortedMap<TableName, HbckTableInfo> tablesInfo = new ConcurrentSkipListMap<>();
322 * When initially looking at HDFS, we attempt to find any orphaned data.
324 private List<HbckRegionInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<>());
326 private Map<TableName, Set<String>> orphanTableDirs = new HashMap<>();
327 private Map<TableName, TableState> tableStates = new HashMap<>();
328 private final RetryCounterFactory lockFileRetryCounterFactory;
329 private final RetryCounterFactory createZNodeRetryCounterFactory;
331 private Map<TableName, Set<String>> skippedRegions = new HashMap<>();
333 private ZKWatcher zkw = null;
334 private String hbckEphemeralNodePath = null;
335 private boolean hbckZodeCreated = false;
338 * Constructor
340 * @param conf Configuration object
341 * @throws MasterNotRunningException if the master is not running
342 * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
344 public HBaseFsck(Configuration conf) throws IOException, ClassNotFoundException {
345 this(conf, createThreadPool(conf));
348 private static ExecutorService createThreadPool(Configuration conf) {
349 int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
350 return new ScheduledThreadPoolExecutor(numThreads,
351 new ThreadFactoryBuilder().setNameFormat("hbasefsck-pool-%d").setDaemon(true)
352 .setUncaughtExceptionHandler(Threads.LOGGING_EXCEPTION_HANDLER).build());
356 * Constructor
358 * @param conf
359 * Configuration object
360 * @throws MasterNotRunningException
361 * if the master is not running
362 * @throws ZooKeeperConnectionException
363 * if unable to connect to ZooKeeper
365 public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
366 ZooKeeperConnectionException, IOException, ClassNotFoundException {
367 super(conf);
368 errors = getErrorReporter(getConf());
369 this.executor = exec;
370 lockFileRetryCounterFactory = createLockRetryCounterFactory(getConf());
371 createZNodeRetryCounterFactory = createZnodeRetryCounterFactory(getConf());
372 zkw = createZooKeeperWatcher();
376 * @return A retry counter factory configured for retrying lock file creation.
378 public static RetryCounterFactory createLockRetryCounterFactory(Configuration conf) {
379 return new RetryCounterFactory(
380 conf.getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS),
381 conf.getInt("hbase.hbck.lockfile.attempt.sleep.interval",
382 DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL),
383 conf.getInt("hbase.hbck.lockfile.attempt.maxsleeptime",
384 DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME));
388 * @return A retry counter factory configured for retrying znode creation.
390 private static RetryCounterFactory createZnodeRetryCounterFactory(Configuration conf) {
391 return new RetryCounterFactory(
392 conf.getInt("hbase.hbck.createznode.attempts", DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS),
393 conf.getInt("hbase.hbck.createznode.attempt.sleep.interval",
394 DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL),
395 conf.getInt("hbase.hbck.createznode.attempt.maxsleeptime",
396 DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME));
400 * @return Return the tmp dir this tool writes too.
402 @InterfaceAudience.Private
403 public static Path getTmpDir(Configuration conf) throws IOException {
404 return new Path(CommonFSUtils.getRootDir(conf), HConstants.HBASE_TEMP_DIRECTORY);
407 private static class FileLockCallable implements Callable<FSDataOutputStream> {
408 RetryCounter retryCounter;
409 private final Configuration conf;
410 private Path hbckLockPath = null;
412 public FileLockCallable(Configuration conf, RetryCounter retryCounter) {
413 this.retryCounter = retryCounter;
414 this.conf = conf;
418 * @return Will be <code>null</code> unless you call {@link #call()}
420 Path getHbckLockPath() {
421 return this.hbckLockPath;
424 @Override
425 public FSDataOutputStream call() throws IOException {
426 try {
427 FileSystem fs = CommonFSUtils.getCurrentFileSystem(this.conf);
428 FsPermission defaultPerms =
429 CommonFSUtils.getFilePermissions(fs, this.conf, HConstants.DATA_FILE_UMASK_KEY);
430 Path tmpDir = getTmpDir(conf);
431 this.hbckLockPath = new Path(tmpDir, HBCK_LOCK_FILE);
432 fs.mkdirs(tmpDir);
433 final FSDataOutputStream out = createFileWithRetries(fs, this.hbckLockPath, defaultPerms);
434 out.writeBytes(InetAddress.getLocalHost().toString());
435 // Add a note into the file we write on why hbase2 is writing out an hbck1 lock file.
436 out.writeBytes(" Written by an hbase-2.x Master to block an " +
437 "attempt by an hbase-1.x HBCK tool making modification to state. " +
438 "See 'HBCK must match HBase server version' in the hbase refguide.");
439 out.flush();
440 return out;
441 } catch(RemoteException e) {
442 if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
443 return null;
444 } else {
445 throw e;
450 private FSDataOutputStream createFileWithRetries(final FileSystem fs,
451 final Path hbckLockFilePath, final FsPermission defaultPerms)
452 throws IOException {
453 IOException exception = null;
454 do {
455 try {
456 return CommonFSUtils.create(fs, hbckLockFilePath, defaultPerms, false);
457 } catch (IOException ioe) {
458 LOG.info("Failed to create lock file " + hbckLockFilePath.getName()
459 + ", try=" + (retryCounter.getAttemptTimes() + 1) + " of "
460 + retryCounter.getMaxAttempts());
461 LOG.debug("Failed to create lock file " + hbckLockFilePath.getName(),
462 ioe);
463 try {
464 exception = ioe;
465 retryCounter.sleepUntilNextRetry();
466 } catch (InterruptedException ie) {
467 throw (InterruptedIOException) new InterruptedIOException(
468 "Can't create lock file " + hbckLockFilePath.getName())
469 .initCause(ie);
472 } while (retryCounter.shouldRetry());
474 throw exception;
479 * This method maintains a lock using a file. If the creation fails we return null
481 * @return FSDataOutputStream object corresponding to the newly opened lock file
482 * @throws IOException if IO failure occurs
484 public static Pair<Path, FSDataOutputStream> checkAndMarkRunningHbck(Configuration conf,
485 RetryCounter retryCounter) throws IOException {
486 FileLockCallable callable = new FileLockCallable(conf, retryCounter);
487 ExecutorService executor = Executors.newFixedThreadPool(1);
488 FutureTask<FSDataOutputStream> futureTask = new FutureTask<>(callable);
489 executor.execute(futureTask);
490 final int timeoutInSeconds = conf.getInt(
491 "hbase.hbck.lockfile.maxwaittime", DEFAULT_WAIT_FOR_LOCK_TIMEOUT);
492 FSDataOutputStream stream = null;
493 try {
494 stream = futureTask.get(timeoutInSeconds, TimeUnit.SECONDS);
495 } catch (ExecutionException ee) {
496 LOG.warn("Encountered exception when opening lock file", ee);
497 } catch (InterruptedException ie) {
498 LOG.warn("Interrupted when opening lock file", ie);
499 Thread.currentThread().interrupt();
500 } catch (TimeoutException exception) {
501 // took too long to obtain lock
502 LOG.warn("Took more than " + timeoutInSeconds + " seconds in obtaining lock");
503 futureTask.cancel(true);
504 } finally {
505 executor.shutdownNow();
507 return new Pair<Path, FSDataOutputStream>(callable.getHbckLockPath(), stream);
510 private void unlockHbck() {
511 if (isExclusive() && hbckLockCleanup.compareAndSet(true, false)) {
512 RetryCounter retryCounter = lockFileRetryCounterFactory.create();
513 do {
514 try {
515 Closeables.close(hbckOutFd, true);
516 CommonFSUtils.delete(CommonFSUtils.getCurrentFileSystem(getConf()), HBCK_LOCK_PATH, true);
517 LOG.info("Finishing hbck");
518 return;
519 } catch (IOException ioe) {
520 LOG.info("Failed to delete " + HBCK_LOCK_PATH + ", try="
521 + (retryCounter.getAttemptTimes() + 1) + " of "
522 + retryCounter.getMaxAttempts());
523 LOG.debug("Failed to delete " + HBCK_LOCK_PATH, ioe);
524 try {
525 retryCounter.sleepUntilNextRetry();
526 } catch (InterruptedException ie) {
527 Thread.currentThread().interrupt();
528 LOG.warn("Interrupted while deleting lock file" +
529 HBCK_LOCK_PATH);
530 return;
533 } while (retryCounter.shouldRetry());
538 * To repair region consistency, one must call connect() in order to repair
539 * online state.
541 public void connect() throws IOException {
543 if (isExclusive()) {
544 // Grab the lock
545 Pair<Path, FSDataOutputStream> pair =
546 checkAndMarkRunningHbck(getConf(), this.lockFileRetryCounterFactory.create());
547 HBCK_LOCK_PATH = pair.getFirst();
548 this.hbckOutFd = pair.getSecond();
549 if (hbckOutFd == null) {
550 setRetCode(-1);
551 LOG.error("Another instance of hbck is fixing HBase, exiting this instance. " +
552 "[If you are sure no other instance is running, delete the lock file " +
553 HBCK_LOCK_PATH + " and rerun the tool]");
554 throw new IOException("Duplicate hbck - Abort");
557 // Make sure to cleanup the lock
558 hbckLockCleanup.set(true);
562 // Add a shutdown hook to this thread, in case user tries to
563 // kill the hbck with a ctrl-c, we want to cleanup the lock so that
564 // it is available for further calls
565 Runtime.getRuntime().addShutdownHook(new Thread() {
566 @Override
567 public void run() {
568 IOUtils.closeQuietly(HBaseFsck.this, e -> LOG.warn("", e));
569 cleanupHbckZnode();
570 unlockHbck();
574 LOG.info("Launching hbck");
576 connection = ConnectionFactory.createConnection(getConf());
577 admin = connection.getAdmin();
578 meta = connection.getTable(TableName.META_TABLE_NAME);
579 status = admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS,
580 Option.DEAD_SERVERS, Option.MASTER, Option.BACKUP_MASTERS,
581 Option.REGIONS_IN_TRANSITION, Option.HBASE_VERSION));
585 * Get deployed regions according to the region servers.
587 private void loadDeployedRegions() throws IOException, InterruptedException {
588 // From the master, get a list of all known live region servers
589 Collection<ServerName> regionServers = status.getLiveServerMetrics().keySet();
590 errors.print("Number of live region servers: " + regionServers.size());
591 if (details) {
592 for (ServerName rsinfo: regionServers) {
593 errors.print(" " + rsinfo.getServerName());
597 // From the master, get a list of all dead region servers
598 Collection<ServerName> deadRegionServers = status.getDeadServerNames();
599 errors.print("Number of dead region servers: " + deadRegionServers.size());
600 if (details) {
601 for (ServerName name: deadRegionServers) {
602 errors.print(" " + name);
606 // Print the current master name and state
607 errors.print("Master: " + status.getMasterName());
609 // Print the list of all backup masters
610 Collection<ServerName> backupMasters = status.getBackupMasterNames();
611 errors.print("Number of backup masters: " + backupMasters.size());
612 if (details) {
613 for (ServerName name: backupMasters) {
614 errors.print(" " + name);
618 errors.print("Average load: " + status.getAverageLoad());
619 errors.print("Number of requests: " + status.getRequestCount());
620 errors.print("Number of regions: " + status.getRegionCount());
622 List<RegionState> rits = status.getRegionStatesInTransition();
623 errors.print("Number of regions in transition: " + rits.size());
624 if (details) {
625 for (RegionState state: rits) {
626 errors.print(" " + state.toDescriptiveString());
630 // Determine what's deployed
631 processRegionServers(regionServers);
635 * Clear the current state of hbck.
637 private void clearState() {
638 // Make sure regionInfo is empty before starting
639 fixes = 0;
640 regionInfoMap.clear();
641 emptyRegionInfoQualifiers.clear();
642 tableStates.clear();
643 errors.clear();
644 tablesInfo.clear();
645 orphanHdfsDirs.clear();
646 skippedRegions.clear();
650 * This repair method analyzes hbase data in hdfs and repairs it to satisfy
651 * the table integrity rules. HBase doesn't need to be online for this
652 * operation to work.
654 public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
655 // Initial pass to fix orphans.
656 if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
657 || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
658 LOG.info("Loading regioninfos HDFS");
659 // if nothing is happening this should always complete in two iterations.
660 int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
661 int curIter = 0;
662 do {
663 clearState(); // clears hbck state and reset fixes to 0 and.
664 // repair what's on HDFS
665 restoreHdfsIntegrity();
666 curIter++;// limit the number of iterations.
667 } while (fixes > 0 && curIter <= maxIterations);
669 // Repairs should be done in the first iteration and verification in the second.
670 // If there are more than 2 passes, something funny has happened.
671 if (curIter > 2) {
672 if (curIter == maxIterations) {
673 LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
674 + "Tables integrity may not be fully repaired!");
675 } else {
676 LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
683 * This repair method requires the cluster to be online since it contacts
684 * region servers and the masters. It makes each region's state in HDFS, in
685 * hbase:meta, and deployments consistent.
687 * @return If &gt; 0 , number of errors detected, if &lt; 0 there was an unrecoverable
688 * error. If 0, we have a clean hbase.
690 public int onlineConsistencyRepair() throws IOException, KeeperException,
691 InterruptedException {
693 // get regions according to what is online on each RegionServer
694 loadDeployedRegions();
695 // check whether hbase:meta is deployed and online
696 recordMetaRegion();
697 // Check if hbase:meta is found only once and in the right place
698 if (!checkMetaRegion()) {
699 String errorMsg = "hbase:meta table is not consistent. ";
700 if (shouldFixAssignments()) {
701 errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
702 } else {
703 errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
705 errors.reportError(errorMsg + " Exiting...");
706 return -2;
708 // Not going with further consistency check for tables when hbase:meta itself is not consistent.
709 LOG.info("Loading regionsinfo from the hbase:meta table");
710 boolean success = loadMetaEntries();
711 if (!success) return -1;
713 // Empty cells in hbase:meta?
714 reportEmptyMetaCells();
716 // Check if we have to cleanup empty REGIONINFO_QUALIFIER rows from hbase:meta
717 if (shouldFixEmptyMetaCells()) {
718 fixEmptyMetaCells();
721 // get a list of all tables that have not changed recently.
722 if (!checkMetaOnly) {
723 reportTablesInFlux();
726 // Get disabled tables states
727 loadTableStates();
729 // load regiondirs and regioninfos from HDFS
730 if (shouldCheckHdfs()) {
731 LOG.info("Loading region directories from HDFS");
732 loadHdfsRegionDirs();
733 LOG.info("Loading region information from HDFS");
734 loadHdfsRegionInfos();
737 // fix the orphan tables
738 fixOrphanTables();
740 LOG.info("Checking and fixing region consistency");
741 // Check and fix consistency
742 checkAndFixConsistency();
744 // Check integrity (does not fix)
745 checkIntegrity();
746 return errors.getErrorList().size();
750 * This method maintains an ephemeral znode. If the creation fails we return false or throw
751 * exception
753 * @return true if creating znode succeeds; false otherwise
754 * @throws IOException if IO failure occurs
756 private boolean setMasterInMaintenanceMode() throws IOException {
757 RetryCounter retryCounter = createZNodeRetryCounterFactory.create();
758 hbckEphemeralNodePath = ZNodePaths.joinZNode(
759 zkw.getZNodePaths().masterMaintZNode,
760 "hbck-" + Long.toString(EnvironmentEdgeManager.currentTime()));
761 do {
762 try {
763 hbckZodeCreated = ZKUtil.createEphemeralNodeAndWatch(zkw, hbckEphemeralNodePath, null);
764 if (hbckZodeCreated) {
765 break;
767 } catch (KeeperException e) {
768 if (retryCounter.getAttemptTimes() >= retryCounter.getMaxAttempts()) {
769 throw new IOException("Can't create znode " + hbckEphemeralNodePath, e);
771 // fall through and retry
774 LOG.warn("Fail to create znode " + hbckEphemeralNodePath + ", try=" +
775 (retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts());
777 try {
778 retryCounter.sleepUntilNextRetry();
779 } catch (InterruptedException ie) {
780 throw (InterruptedIOException) new InterruptedIOException(
781 "Can't create znode " + hbckEphemeralNodePath).initCause(ie);
783 } while (retryCounter.shouldRetry());
784 return hbckZodeCreated;
787 private void cleanupHbckZnode() {
788 try {
789 if (zkw != null && hbckZodeCreated) {
790 ZKUtil.deleteNode(zkw, hbckEphemeralNodePath);
791 hbckZodeCreated = false;
793 } catch (KeeperException e) {
794 // Ignore
795 if (!e.code().equals(KeeperException.Code.NONODE)) {
796 LOG.warn("Delete HBCK znode " + hbckEphemeralNodePath + " failed ", e);
802 * Contacts the master and prints out cluster-wide information
803 * @return 0 on success, non-zero on failure
805 public int onlineHbck()
806 throws IOException, KeeperException, InterruptedException, ReplicationException {
807 // print hbase server version
808 errors.print("Version: " + status.getHBaseVersion());
810 // Clean start
811 clearState();
812 // Do offline check and repair first
813 offlineHdfsIntegrityRepair();
814 offlineReferenceFileRepair();
815 offlineHLinkFileRepair();
816 // If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online
817 // hbck, it is likely that hbck would be misled and report transient errors. Therefore, it
818 // is better to set Master into maintenance mode during online hbck.
820 if (!setMasterInMaintenanceMode()) {
821 LOG.warn("HBCK is running while master is not in maintenance mode, you might see transient "
822 + "error. Please run HBCK multiple times to reduce the chance of transient error.");
825 onlineConsistencyRepair();
827 if (checkRegionBoundaries) {
828 checkRegionBoundaries();
831 checkAndFixReplication();
833 cleanReplicationBarrier();
835 // Remove the hbck znode
836 cleanupHbckZnode();
838 // Remove the hbck lock
839 unlockHbck();
841 // Print table summary
842 printTableSummary(tablesInfo);
843 return errors.summarize();
846 public static byte[] keyOnly(byte[] b) {
847 if (b == null)
848 return b;
849 int rowlength = Bytes.toShort(b, 0);
850 byte[] result = new byte[rowlength];
851 System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
852 return result;
855 @Override
856 public void close() throws IOException {
857 try {
858 cleanupHbckZnode();
859 unlockHbck();
860 } catch (Exception io) {
861 LOG.warn(io.toString(), io);
862 } finally {
863 if (zkw != null) {
864 zkw.close();
865 zkw = null;
867 IOUtils.closeQuietly(admin, e -> LOG.warn("", e));
868 IOUtils.closeQuietly(meta, e -> LOG.warn("", e));
869 IOUtils.closeQuietly(connection, e -> LOG.warn("", e));
873 private static class RegionBoundariesInformation {
874 public byte [] regionName;
875 public byte [] metaFirstKey;
876 public byte [] metaLastKey;
877 public byte [] storesFirstKey;
878 public byte [] storesLastKey;
879 @Override
880 public String toString () {
881 return "regionName=" + Bytes.toStringBinary(regionName) +
882 "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
883 "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) +
884 "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) +
885 "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey);
889 public void checkRegionBoundaries() {
890 try {
891 ByteArrayComparator comparator = new ByteArrayComparator();
892 List<RegionInfo> regions = MetaTableAccessor.getAllRegions(connection, true);
893 final RegionBoundariesInformation currentRegionBoundariesInformation =
894 new RegionBoundariesInformation();
895 Path hbaseRoot = CommonFSUtils.getRootDir(getConf());
896 for (RegionInfo regionInfo : regions) {
897 Path tableDir = CommonFSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
898 currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
899 // For each region, get the start and stop key from the META and compare them to the
900 // same information from the Stores.
901 Path path = new Path(tableDir, regionInfo.getEncodedName());
902 FileSystem fs = path.getFileSystem(getConf());
903 FileStatus[] files = fs.listStatus(path);
904 // For all the column families in this region...
905 byte[] storeFirstKey = null;
906 byte[] storeLastKey = null;
907 for (FileStatus file : files) {
908 String fileName = file.getPath().toString();
909 fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
910 if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
911 FileStatus[] storeFiles = fs.listStatus(file.getPath());
912 // For all the stores in this column family.
913 for (FileStatus storeFile : storeFiles) {
914 HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(),
915 CacheConfig.DISABLED, true, getConf());
916 if ((reader.getFirstKey() != null)
917 && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
918 ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey().get()).getKey()) > 0))) {
919 storeFirstKey = ((KeyValue.KeyOnlyKeyValue)reader.getFirstKey().get()).getKey();
921 if ((reader.getLastKey() != null)
922 && ((storeLastKey == null) || (comparator.compare(storeLastKey,
923 ((KeyValue.KeyOnlyKeyValue)reader.getLastKey().get()).getKey())) < 0)) {
924 storeLastKey = ((KeyValue.KeyOnlyKeyValue)reader.getLastKey().get()).getKey();
926 reader.close();
930 currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
931 currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
932 currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
933 currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
934 if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
935 currentRegionBoundariesInformation.metaFirstKey = null;
936 if (currentRegionBoundariesInformation.metaLastKey.length == 0)
937 currentRegionBoundariesInformation.metaLastKey = null;
939 // For a region to be correct, we need the META start key to be smaller or equal to the
940 // smallest start key from all the stores, and the start key from the next META entry to
941 // be bigger than the last key from all the current stores. First region start key is null;
942 // Last region end key is null; some regions can be empty and not have any store.
944 boolean valid = true;
945 // Checking start key.
946 if ((currentRegionBoundariesInformation.storesFirstKey != null)
947 && (currentRegionBoundariesInformation.metaFirstKey != null)) {
948 valid = valid
949 && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
950 currentRegionBoundariesInformation.metaFirstKey) >= 0;
952 // Checking stop key.
953 if ((currentRegionBoundariesInformation.storesLastKey != null)
954 && (currentRegionBoundariesInformation.metaLastKey != null)) {
955 valid = valid
956 && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
957 currentRegionBoundariesInformation.metaLastKey) < 0;
959 if (!valid) {
960 errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
961 tablesInfo.get(regionInfo.getTable()));
962 LOG.warn("Region's boundaries not aligned between stores and META for:");
963 LOG.warn(Objects.toString(currentRegionBoundariesInformation));
966 } catch (IOException e) {
967 LOG.error(e.toString(), e);
972 * Iterates through the list of all orphan/invalid regiondirs.
974 private void adoptHdfsOrphans(Collection<HbckRegionInfo> orphanHdfsDirs) throws IOException {
975 for (HbckRegionInfo hi : orphanHdfsDirs) {
976 LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
977 adoptHdfsOrphan(hi);
982 * Orphaned regions are regions without a .regioninfo file in them. We "adopt"
983 * these orphans by creating a new region, and moving the column families,
984 * recovered edits, WALs, into the new region dir. We determine the region
985 * startkey and endkeys by looking at all of the hfiles inside the column
986 * families to identify the min and max keys. The resulting region will
987 * likely violate table integrity but will be dealt with by merging
988 * overlapping regions.
990 @SuppressWarnings("deprecation")
991 private void adoptHdfsOrphan(HbckRegionInfo hi) throws IOException {
992 Path p = hi.getHdfsRegionDir();
993 FileSystem fs = p.getFileSystem(getConf());
994 FileStatus[] dirs = fs.listStatus(p);
995 if (dirs == null) {
996 LOG.warn("Attempt to adopt orphan hdfs region skipped because no files present in " +
997 p + ". This dir could probably be deleted.");
998 return ;
1001 TableName tableName = hi.getTableName();
1002 HbckTableInfo tableInfo = tablesInfo.get(tableName);
1003 Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
1004 TableDescriptor template = tableInfo.getTableDescriptor();
1006 // find min and max key values
1007 Pair<byte[],byte[]> orphanRegionRange = null;
1008 for (FileStatus cf : dirs) {
1009 String cfName= cf.getPath().getName();
1010 // TODO Figure out what the special dirs are
1011 if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
1013 FileStatus[] hfiles = fs.listStatus(cf.getPath());
1014 for (FileStatus hfile : hfiles) {
1015 byte[] start, end;
1016 HFile.Reader hf = null;
1017 try {
1018 hf = HFile.createReader(fs, hfile.getPath(), CacheConfig.DISABLED, true, getConf());
1019 Optional<Cell> startKv = hf.getFirstKey();
1020 start = CellUtil.cloneRow(startKv.get());
1021 Optional<Cell> endKv = hf.getLastKey();
1022 end = CellUtil.cloneRow(endKv.get());
1023 } catch (IOException ioe) {
1024 LOG.warn("Problem reading orphan file " + hfile + ", skipping");
1025 continue;
1026 } catch (NullPointerException ioe) {
1027 LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
1028 continue;
1029 } finally {
1030 if (hf != null) {
1031 hf.close();
1035 // expand the range to include the range of all hfiles
1036 if (orphanRegionRange == null) {
1037 // first range
1038 orphanRegionRange = new Pair<>(start, end);
1039 } else {
1040 // TODO add test
1042 // expand range only if the hfile is wider.
1043 if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
1044 orphanRegionRange.setFirst(start);
1046 if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
1047 orphanRegionRange.setSecond(end);
1052 if (orphanRegionRange == null) {
1053 LOG.warn("No data in dir " + p + ", sidelining data");
1054 fixes++;
1055 sidelineRegionDir(fs, hi);
1056 return;
1058 LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
1059 Bytes.toString(orphanRegionRange.getSecond()) + ")");
1061 // create new region on hdfs. move data into place.
1062 RegionInfo regionInfo = RegionInfoBuilder.newBuilder(template.getTableName())
1063 .setStartKey(orphanRegionRange.getFirst())
1064 .setEndKey(Bytes.add(orphanRegionRange.getSecond(), new byte[1]))
1065 .build();
1066 LOG.info("Creating new region : " + regionInfo);
1067 HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), regionInfo, template);
1068 Path target = region.getRegionFileSystem().getRegionDir();
1070 // rename all the data to new region
1071 mergeRegionDirs(target, hi);
1072 fixes++;
1076 * This method determines if there are table integrity errors in HDFS. If
1077 * there are errors and the appropriate "fix" options are enabled, the method
1078 * will first correct orphan regions making them into legit regiondirs, and
1079 * then reload to merge potentially overlapping regions.
1081 * @return number of table integrity errors found
1083 private int restoreHdfsIntegrity() throws IOException, InterruptedException {
1084 // Determine what's on HDFS
1085 LOG.info("Loading HBase regioninfo from HDFS...");
1086 loadHdfsRegionDirs(); // populating regioninfo table.
1088 int errs = errors.getErrorList().size();
1089 // First time just get suggestions.
1090 tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1091 checkHdfsIntegrity(false, false);
1093 if (errors.getErrorList().size() == errs) {
1094 LOG.info("No integrity errors. We are done with this phase. Glorious.");
1095 return 0;
1098 if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
1099 adoptHdfsOrphans(orphanHdfsDirs);
1100 // TODO optimize by incrementally adding instead of reloading.
1103 // Make sure there are no holes now.
1104 if (shouldFixHdfsHoles()) {
1105 clearState(); // this also resets # fixes.
1106 loadHdfsRegionDirs();
1107 tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1108 tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
1111 // Now we fix overlaps
1112 if (shouldFixHdfsOverlaps()) {
1113 // second pass we fix overlaps.
1114 clearState(); // this also resets # fixes.
1115 loadHdfsRegionDirs();
1116 tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1117 tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
1120 return errors.getErrorList().size();
1124 * Scan all the store file names to find any lingering reference files,
1125 * which refer to some none-exiting files. If "fix" option is enabled,
1126 * any lingering reference file will be sidelined if found.
1127 * <p>
1128 * Lingering reference file prevents a region from opening. It has to
1129 * be fixed before a cluster can start properly.
1131 private void offlineReferenceFileRepair() throws IOException, InterruptedException {
1132 clearState();
1133 Configuration conf = getConf();
1134 Path hbaseRoot = CommonFSUtils.getRootDir(conf);
1135 FileSystem fs = hbaseRoot.getFileSystem(conf);
1136 LOG.info("Computing mapping of all store files");
1137 Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot,
1138 new FSUtils.ReferenceFileFilter(fs), executor, errors);
1139 errors.print("");
1140 LOG.info("Validating mapping using HDFS state");
1141 for (Path path: allFiles.values()) {
1142 Path referredToFile = StoreFileInfo.getReferredToFile(path);
1143 if (fs.exists(referredToFile)) continue; // good, expected
1145 // Found a lingering reference file
1146 errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
1147 "Found lingering reference file " + path);
1148 if (!shouldFixReferenceFiles()) continue;
1150 // Now, trying to fix it since requested
1151 boolean success = false;
1152 String pathStr = path.toString();
1154 // A reference file path should be like
1155 // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/referred_file.region_name
1156 // Up 5 directories to get the root folder.
1157 // So the file will be sidelined to a similar folder structure.
1158 int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
1159 for (int i = 0; index > 0 && i < 5; i++) {
1160 index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
1162 if (index > 0) {
1163 Path rootDir = getSidelineDir();
1164 Path dst = new Path(rootDir, pathStr.substring(index + 1));
1165 fs.mkdirs(dst.getParent());
1166 LOG.info("Trying to sideline reference file "
1167 + path + " to " + dst);
1168 setShouldRerun();
1170 success = fs.rename(path, dst);
1171 debugLsr(dst);
1174 if (!success) {
1175 LOG.error("Failed to sideline reference file " + path);
1181 * Scan all the store file names to find any lingering HFileLink files,
1182 * which refer to some none-exiting files. If "fix" option is enabled,
1183 * any lingering HFileLink file will be sidelined if found.
1185 private void offlineHLinkFileRepair() throws IOException, InterruptedException {
1186 Configuration conf = getConf();
1187 Path hbaseRoot = CommonFSUtils.getRootDir(conf);
1188 FileSystem fs = hbaseRoot.getFileSystem(conf);
1189 LOG.info("Computing mapping of all link files");
1190 Map<String, Path> allFiles = FSUtils
1191 .getTableStoreFilePathMap(fs, hbaseRoot, new FSUtils.HFileLinkFilter(), executor, errors);
1192 errors.print("");
1194 LOG.info("Validating mapping using HDFS state");
1195 for (Path path : allFiles.values()) {
1196 // building HFileLink object to gather locations
1197 HFileLink actualLink = HFileLink.buildFromHFileLinkPattern(conf, path);
1198 if (actualLink.exists(fs)) continue; // good, expected
1200 // Found a lingering HFileLink
1201 errors.reportError(ERROR_CODE.LINGERING_HFILELINK, "Found lingering HFileLink " + path);
1202 if (!shouldFixHFileLinks()) continue;
1204 // Now, trying to fix it since requested
1205 setShouldRerun();
1207 // An HFileLink path should be like
1208 // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/linkedtable=linkedregionname-linkedhfilename
1209 // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
1210 boolean success = sidelineFile(fs, hbaseRoot, path);
1212 if (!success) {
1213 LOG.error("Failed to sideline HFileLink file " + path);
1216 // An HFileLink backreference path should be like
1217 // ${hbase.rootdir}/archive/data/namespace/table_name/region_id/family_name/.links-linkedhfilename
1218 // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
1219 Path backRefPath = FileLink.getBackReferencesDir(HFileArchiveUtil
1220 .getStoreArchivePath(conf, HFileLink.getReferencedTableName(path.getName().toString()),
1221 HFileLink.getReferencedRegionName(path.getName().toString()),
1222 path.getParent().getName()),
1223 HFileLink.getReferencedHFileName(path.getName().toString()));
1224 success = sidelineFile(fs, hbaseRoot, backRefPath);
1226 if (!success) {
1227 LOG.error("Failed to sideline HFileLink backreference file " + path);
1232 private boolean sidelineFile(FileSystem fs, Path hbaseRoot, Path path) throws IOException {
1233 URI uri = hbaseRoot.toUri().relativize(path.toUri());
1234 if (uri.isAbsolute()) return false;
1235 String relativePath = uri.getPath();
1236 Path rootDir = getSidelineDir();
1237 Path dst = new Path(rootDir, relativePath);
1238 boolean pathCreated = fs.mkdirs(dst.getParent());
1239 if (!pathCreated) {
1240 LOG.error("Failed to create path: " + dst.getParent());
1241 return false;
1243 LOG.info("Trying to sideline file " + path + " to " + dst);
1244 return fs.rename(path, dst);
1248 * TODO -- need to add tests for this.
1250 private void reportEmptyMetaCells() {
1251 errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " +
1252 emptyRegionInfoQualifiers.size());
1253 if (details) {
1254 for (Result r: emptyRegionInfoQualifiers) {
1255 errors.print(" " + r);
1261 * TODO -- need to add tests for this.
1263 private void reportTablesInFlux() {
1264 AtomicInteger numSkipped = new AtomicInteger(0);
1265 TableDescriptor[] allTables = getTables(numSkipped);
1266 errors.print("Number of Tables: " + allTables.length);
1267 if (details) {
1268 if (numSkipped.get() > 0) {
1269 errors.detail("Number of Tables in flux: " + numSkipped.get());
1271 for (TableDescriptor td : allTables) {
1272 errors.detail(" Table: " + td.getTableName() + "\t" +
1273 (td.isReadOnly() ? "ro" : "rw") + "\t" +
1274 (td.isMetaRegion() ? "META" : " ") + "\t" +
1275 " families: " + td.getColumnFamilyCount());
1280 public HbckErrorReporter getErrors() {
1281 return errors;
1285 * Populate hbi's from regionInfos loaded from file system.
1287 private SortedMap<TableName, HbckTableInfo> loadHdfsRegionInfos()
1288 throws IOException, InterruptedException {
1289 tablesInfo.clear(); // regenerating the data
1290 // generate region split structure
1291 Collection<HbckRegionInfo> hbckRegionInfos = regionInfoMap.values();
1293 // Parallelized read of .regioninfo files.
1294 List<WorkItemHdfsRegionInfo> hbis = new ArrayList<>(hbckRegionInfos.size());
1295 List<Future<Void>> hbiFutures;
1297 for (HbckRegionInfo hbi : hbckRegionInfos) {
1298 WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
1299 hbis.add(work);
1302 // Submit and wait for completion
1303 hbiFutures = executor.invokeAll(hbis);
1305 for(int i=0; i<hbiFutures.size(); i++) {
1306 WorkItemHdfsRegionInfo work = hbis.get(i);
1307 Future<Void> f = hbiFutures.get(i);
1308 try {
1309 f.get();
1310 } catch(ExecutionException e) {
1311 LOG.warn("Failed to read .regioninfo file for region " +
1312 work.hbi.getRegionNameAsString(), e.getCause());
1316 Path hbaseRoot = CommonFSUtils.getRootDir(getConf());
1317 FileSystem fs = hbaseRoot.getFileSystem(getConf());
1318 // serialized table info gathering.
1319 for (HbckRegionInfo hbi: hbckRegionInfos) {
1321 if (hbi.getHdfsHRI() == null) {
1322 // was an orphan
1323 continue;
1327 // get table name from hdfs, populate various HBaseFsck tables.
1328 TableName tableName = hbi.getTableName();
1329 if (tableName == null) {
1330 // There was an entry in hbase:meta not in the HDFS?
1331 LOG.warn("tableName was null for: " + hbi);
1332 continue;
1335 HbckTableInfo modTInfo = tablesInfo.get(tableName);
1336 if (modTInfo == null) {
1337 // only executed once per table.
1338 modTInfo = new HbckTableInfo(tableName, this);
1339 tablesInfo.put(tableName, modTInfo);
1340 try {
1341 TableDescriptor htd =
1342 FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
1343 modTInfo.htds.add(htd);
1344 } catch (IOException ioe) {
1345 if (!orphanTableDirs.containsKey(tableName)) {
1346 LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1347 //should only report once for each table
1348 errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1349 "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1350 Set<String> columns = new HashSet<>();
1351 orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1355 if (!hbi.isSkipChecks()) {
1356 modTInfo.addRegionInfo(hbi);
1360 loadTableInfosForTablesWithNoRegion();
1361 errors.print("");
1363 return tablesInfo;
1367 * To get the column family list according to the column family dirs
1368 * @param columns
1369 * @param hbi
1370 * @return a set of column families
1371 * @throws IOException
1373 private Set<String> getColumnFamilyList(Set<String> columns, HbckRegionInfo hbi)
1374 throws IOException {
1375 Path regionDir = hbi.getHdfsRegionDir();
1376 FileSystem fs = regionDir.getFileSystem(getConf());
1377 FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1378 for (FileStatus subdir : subDirs) {
1379 String columnfamily = subdir.getPath().getName();
1380 columns.add(columnfamily);
1382 return columns;
1386 * To fabricate a .tableinfo file with following contents<br>
1387 * 1. the correct tablename <br>
1388 * 2. the correct colfamily list<br>
1389 * 3. the default properties for both {@link TableDescriptor} and {@link ColumnFamilyDescriptor}<br>
1390 * @throws IOException
1392 private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
1393 Set<String> columns) throws IOException {
1394 if (columns ==null || columns.isEmpty()) return false;
1395 TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName);
1396 for (String columnfamimly : columns) {
1397 builder.setColumnFamily(ColumnFamilyDescriptorBuilder.of(columnfamimly));
1399 fstd.createTableDescriptor(builder.build(), true);
1400 return true;
1404 * To fix the empty REGIONINFO_QUALIFIER rows from hbase:meta <br>
1405 * @throws IOException
1407 public void fixEmptyMetaCells() throws IOException {
1408 if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
1409 LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1410 for (Result region : emptyRegionInfoQualifiers) {
1411 deleteMetaRegion(region.getRow());
1412 errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
1414 emptyRegionInfoQualifiers.clear();
1419 * To fix orphan table by creating a .tableinfo file under tableDir <br>
1420 * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
1421 * 2. else create a default .tableinfo file with following items<br>
1422 * &nbsp;2.1 the correct tablename <br>
1423 * &nbsp;2.2 the correct colfamily list<br>
1424 * &nbsp;2.3 the default properties for both {@link TableDescriptor} and {@link ColumnFamilyDescriptor}<br>
1425 * @throws IOException
1427 public void fixOrphanTables() throws IOException {
1428 if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1430 List<TableName> tmpList = new ArrayList<>(orphanTableDirs.keySet().size());
1431 tmpList.addAll(orphanTableDirs.keySet());
1432 TableDescriptor[] htds = getTableDescriptors(tmpList);
1433 Iterator<Entry<TableName, Set<String>>> iter =
1434 orphanTableDirs.entrySet().iterator();
1435 int j = 0;
1436 int numFailedCase = 0;
1437 FSTableDescriptors fstd = new FSTableDescriptors(getConf());
1438 while (iter.hasNext()) {
1439 Entry<TableName, Set<String>> entry =
1440 iter.next();
1441 TableName tableName = entry.getKey();
1442 LOG.info("Trying to fix orphan table error: " + tableName);
1443 if (j < htds.length) {
1444 if (tableName.equals(htds[j].getTableName())) {
1445 TableDescriptor htd = htds[j];
1446 LOG.info("fixing orphan table: " + tableName + " from cache");
1447 fstd.createTableDescriptor(htd, true);
1448 j++;
1449 iter.remove();
1451 } else {
1452 if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
1453 LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1454 LOG.warn("Strongly recommend to modify the TableDescriptor if necessary for: " + tableName);
1455 iter.remove();
1456 } else {
1457 LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
1458 numFailedCase++;
1461 fixes++;
1464 if (orphanTableDirs.isEmpty()) {
1465 // all orphanTableDirs are luckily recovered
1466 // re-run doFsck after recovering the .tableinfo file
1467 setShouldRerun();
1468 LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1469 } else if (numFailedCase > 0) {
1470 LOG.error("Failed to fix " + numFailedCase
1471 + " OrphanTables with default .tableinfo files");
1475 //cleanup the list
1476 orphanTableDirs.clear();
1481 * Log an appropriate message about whether or not overlapping merges are computed in parallel.
1483 private void logParallelMerge() {
1484 if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
1485 LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" +
1486 " false to run serially.");
1487 } else {
1488 LOG.info("Handling overlap merges serially. set hbasefsck.overlap.merge.parallel to" +
1489 " true to run in parallel.");
1493 private SortedMap<TableName, HbckTableInfo> checkHdfsIntegrity(boolean fixHoles,
1494 boolean fixOverlaps) throws IOException {
1495 LOG.info("Checking HBase region split map from HDFS data...");
1496 logParallelMerge();
1497 for (HbckTableInfo tInfo : tablesInfo.values()) {
1498 TableIntegrityErrorHandler handler;
1499 if (fixHoles || fixOverlaps) {
1500 handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1501 fixHoles, fixOverlaps);
1502 } else {
1503 handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1505 if (!tInfo.checkRegionChain(handler)) {
1506 // should dump info as well.
1507 errors.report("Found inconsistency in table " + tInfo.getName());
1510 return tablesInfo;
1513 Path getSidelineDir() throws IOException {
1514 if (sidelineDir == null) {
1515 Path hbaseDir = CommonFSUtils.getRootDir(getConf());
1516 Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1517 sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1518 + startMillis);
1520 return sidelineDir;
1524 * Sideline a region dir (instead of deleting it)
1526 Path sidelineRegionDir(FileSystem fs, HbckRegionInfo hi) throws IOException {
1527 return sidelineRegionDir(fs, null, hi);
1531 * Sideline a region dir (instead of deleting it)
1533 * @param parentDir if specified, the region will be sidelined to folder like
1534 * {@literal .../parentDir/<table name>/<region name>}. The purpose is to group together
1535 * similar regions sidelined, for example, those regions should be bulk loaded back later
1536 * on. If NULL, it is ignored.
1538 Path sidelineRegionDir(FileSystem fs,
1539 String parentDir, HbckRegionInfo hi) throws IOException {
1540 TableName tableName = hi.getTableName();
1541 Path regionDir = hi.getHdfsRegionDir();
1543 if (!fs.exists(regionDir)) {
1544 LOG.warn("No previous " + regionDir + " exists. Continuing.");
1545 return null;
1548 Path rootDir = getSidelineDir();
1549 if (parentDir != null) {
1550 rootDir = new Path(rootDir, parentDir);
1552 Path sidelineTableDir= CommonFSUtils.getTableDir(rootDir, tableName);
1553 Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1554 fs.mkdirs(sidelineRegionDir);
1555 boolean success = false;
1556 FileStatus[] cfs = fs.listStatus(regionDir);
1557 if (cfs == null) {
1558 LOG.info("Region dir is empty: " + regionDir);
1559 } else {
1560 for (FileStatus cf : cfs) {
1561 Path src = cf.getPath();
1562 Path dst = new Path(sidelineRegionDir, src.getName());
1563 if (fs.isFile(src)) {
1564 // simple file
1565 success = fs.rename(src, dst);
1566 if (!success) {
1567 String msg = "Unable to rename file " + src + " to " + dst;
1568 LOG.error(msg);
1569 throw new IOException(msg);
1571 continue;
1574 // is a directory.
1575 fs.mkdirs(dst);
1577 LOG.info("Sidelining files from " + src + " into containing region " + dst);
1578 // FileSystem.rename is inconsistent with directories -- if the
1579 // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
1580 // it moves the src into the dst dir resulting in (foo/a/b). If
1581 // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
1582 FileStatus[] hfiles = fs.listStatus(src);
1583 if (hfiles != null && hfiles.length > 0) {
1584 for (FileStatus hfile : hfiles) {
1585 success = fs.rename(hfile.getPath(), dst);
1586 if (!success) {
1587 String msg = "Unable to rename file " + src + " to " + dst;
1588 LOG.error(msg);
1589 throw new IOException(msg);
1593 LOG.debug("Sideline directory contents:");
1594 debugLsr(sidelineRegionDir);
1598 LOG.info("Removing old region dir: " + regionDir);
1599 success = fs.delete(regionDir, true);
1600 if (!success) {
1601 String msg = "Unable to delete dir " + regionDir;
1602 LOG.error(msg);
1603 throw new IOException(msg);
1605 return sidelineRegionDir;
1609 * Load the list of disabled tables in ZK into local set.
1610 * @throws ZooKeeperConnectionException
1611 * @throws IOException
1613 private void loadTableStates()
1614 throws IOException {
1615 tableStates = MetaTableAccessor.getTableStates(connection);
1616 // Add hbase:meta so this tool keeps working. In hbase2, meta is always enabled though it
1617 // has no entry in the table states. HBCK doesn't work right w/ hbase2 but just do this in
1618 // meantime.
1619 this.tableStates.put(TableName.META_TABLE_NAME,
1620 new TableState(TableName.META_TABLE_NAME, TableState.State.ENABLED));
1624 * Check if the specified region's table is disabled.
1625 * @param tableName table to check status of
1627 boolean isTableDisabled(TableName tableName) {
1628 return tableStates.containsKey(tableName)
1629 && tableStates.get(tableName)
1630 .inStates(TableState.State.DISABLED, TableState.State.DISABLING);
1634 * Scan HDFS for all regions, recording their information into
1635 * regionInfoMap
1637 public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1638 Path rootDir = CommonFSUtils.getRootDir(getConf());
1639 FileSystem fs = rootDir.getFileSystem(getConf());
1641 // list all tables from HDFS
1642 List<FileStatus> tableDirs = Lists.newArrayList();
1644 boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1646 List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1647 for (Path path : paths) {
1648 TableName tableName = CommonFSUtils.getTableName(path);
1649 if ((!checkMetaOnly &&
1650 isTableIncluded(tableName)) ||
1651 tableName.equals(TableName.META_TABLE_NAME)) {
1652 tableDirs.add(fs.getFileStatus(path));
1656 // verify that version file exists
1657 if (!foundVersionFile) {
1658 errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1659 "Version file does not exist in root dir " + rootDir);
1660 if (shouldFixVersionFile()) {
1661 LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1662 + " file.");
1663 setShouldRerun();
1664 FSUtils.setVersion(fs, rootDir, getConf().getInt(
1665 HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1666 HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1667 HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1671 // Avoid multithreading at table-level because already multithreaded internally at
1672 // region-level. Additionally multithreading at table-level can lead to deadlock
1673 // if there are many tables in the cluster. Since there are a limited # of threads
1674 // in the executor's thread pool and if we multithread at the table-level by putting
1675 // WorkItemHdfsDir callables into the executor, then we will have some threads in the
1676 // executor tied up solely in waiting for the tables' region-level calls to complete.
1677 // If there are enough tables then there will be no actual threads in the pool left
1678 // for the region-level callables to be serviced.
1679 for (FileStatus tableDir : tableDirs) {
1680 LOG.debug("Loading region dirs from " +tableDir.getPath());
1681 WorkItemHdfsDir item = new WorkItemHdfsDir(fs, errors, tableDir);
1682 try {
1683 item.call();
1684 } catch (ExecutionException e) {
1685 LOG.warn("Could not completely load table dir " +
1686 tableDir.getPath(), e.getCause());
1689 errors.print("");
1693 * Record the location of the hbase:meta region as found in ZooKeeper.
1695 private boolean recordMetaRegion() throws IOException {
1696 List<HRegionLocation> locs;
1697 try (RegionLocator locator = connection.getRegionLocator(TableName.META_TABLE_NAME)) {
1698 locs = locator.getRegionLocations(HConstants.EMPTY_START_ROW, true);
1700 if (locs == null || locs.isEmpty()) {
1701 errors.reportError(ERROR_CODE.NULL_META_REGION, "META region was not found in ZooKeeper");
1702 return false;
1704 for (HRegionLocation metaLocation : locs) {
1705 // Check if Meta region is valid and existing
1706 if (metaLocation == null) {
1707 errors.reportError(ERROR_CODE.NULL_META_REGION, "META region location is null");
1708 return false;
1710 if (metaLocation.getRegion() == null) {
1711 errors.reportError(ERROR_CODE.NULL_META_REGION, "META location regionInfo is null");
1712 return false;
1714 if (metaLocation.getHostname() == null) {
1715 errors.reportError(ERROR_CODE.NULL_META_REGION, "META location hostName is null");
1716 return false;
1718 ServerName sn = metaLocation.getServerName();
1719 HbckRegionInfo.MetaEntry m = new HbckRegionInfo.MetaEntry(metaLocation.getRegion(), sn,
1720 EnvironmentEdgeManager.currentTime());
1721 HbckRegionInfo hbckRegionInfo = regionInfoMap.get(metaLocation.getRegion().getEncodedName());
1722 if (hbckRegionInfo == null) {
1723 regionInfoMap.put(metaLocation.getRegion().getEncodedName(), new HbckRegionInfo(m));
1724 } else {
1725 hbckRegionInfo.setMetaEntry(m);
1728 return true;
1731 private ZKWatcher createZooKeeperWatcher() throws IOException {
1732 return new ZKWatcher(getConf(), "hbase Fsck", new Abortable() {
1733 @Override
1734 public void abort(String why, Throwable e) {
1735 LOG.error(why, e);
1736 System.exit(1);
1739 @Override
1740 public boolean isAborted() {
1741 return false;
1748 * Contacts each regionserver and fetches metadata about regions.
1749 * @param regionServerList - the list of region servers to connect to
1750 * @throws IOException if a remote or network exception occurs
1752 void processRegionServers(Collection<ServerName> regionServerList)
1753 throws IOException, InterruptedException {
1755 List<WorkItemRegion> workItems = new ArrayList<>(regionServerList.size());
1756 List<Future<Void>> workFutures;
1758 // loop to contact each region server in parallel
1759 for (ServerName rsinfo: regionServerList) {
1760 workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1763 workFutures = executor.invokeAll(workItems);
1765 for(int i=0; i<workFutures.size(); i++) {
1766 WorkItemRegion item = workItems.get(i);
1767 Future<Void> f = workFutures.get(i);
1768 try {
1769 f.get();
1770 } catch(ExecutionException e) {
1771 LOG.warn("Could not process regionserver {}", item.rsinfo.getAddress(),
1772 e.getCause());
1778 * Check consistency of all regions that have been found in previous phases.
1780 private void checkAndFixConsistency()
1781 throws IOException, KeeperException, InterruptedException {
1782 // Divide the checks in two phases. One for default/primary replicas and another
1783 // for the non-primary ones. Keeps code cleaner this way.
1785 List<CheckRegionConsistencyWorkItem> workItems = new ArrayList<>(regionInfoMap.size());
1786 for (java.util.Map.Entry<String, HbckRegionInfo> e: regionInfoMap.entrySet()) {
1787 if (e.getValue().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
1788 workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1791 checkRegionConsistencyConcurrently(workItems);
1793 boolean prevHdfsCheck = shouldCheckHdfs();
1794 setCheckHdfs(false); //replicas don't have any hdfs data
1795 // Run a pass over the replicas and fix any assignment issues that exist on the currently
1796 // deployed/undeployed replicas.
1797 List<CheckRegionConsistencyWorkItem> replicaWorkItems = new ArrayList<>(regionInfoMap.size());
1798 for (java.util.Map.Entry<String, HbckRegionInfo> e: regionInfoMap.entrySet()) {
1799 if (e.getValue().getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
1800 replicaWorkItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1803 checkRegionConsistencyConcurrently(replicaWorkItems);
1804 setCheckHdfs(prevHdfsCheck);
1806 // If some regions is skipped during checkRegionConsistencyConcurrently() phase, we might
1807 // not get accurate state of the hbase if continuing. The config here allows users to tune
1808 // the tolerance of number of skipped region.
1809 // TODO: evaluate the consequence to continue the hbck operation without config.
1810 int terminateThreshold = getConf().getInt("hbase.hbck.skipped.regions.limit", 0);
1811 int numOfSkippedRegions = skippedRegions.size();
1812 if (numOfSkippedRegions > 0 && numOfSkippedRegions > terminateThreshold) {
1813 throw new IOException(numOfSkippedRegions
1814 + " region(s) could not be checked or repaired. See logs for detail.");
1817 if (shouldCheckHdfs()) {
1818 checkAndFixTableStates();
1823 * Check consistency of all regions using multiple threads concurrently.
1825 private void checkRegionConsistencyConcurrently(
1826 final List<CheckRegionConsistencyWorkItem> workItems)
1827 throws IOException, KeeperException, InterruptedException {
1828 if (workItems.isEmpty()) {
1829 return; // nothing to check
1832 List<Future<Void>> workFutures = executor.invokeAll(workItems);
1833 for(Future<Void> f: workFutures) {
1834 try {
1835 f.get();
1836 } catch(ExecutionException e1) {
1837 LOG.warn("Could not check region consistency " , e1.getCause());
1838 if (e1.getCause() instanceof IOException) {
1839 throw (IOException)e1.getCause();
1840 } else if (e1.getCause() instanceof KeeperException) {
1841 throw (KeeperException)e1.getCause();
1842 } else if (e1.getCause() instanceof InterruptedException) {
1843 throw (InterruptedException)e1.getCause();
1844 } else {
1845 throw new IOException(e1.getCause());
1851 class CheckRegionConsistencyWorkItem implements Callable<Void> {
1852 private final String key;
1853 private final HbckRegionInfo hbi;
1855 CheckRegionConsistencyWorkItem(String key, HbckRegionInfo hbi) {
1856 this.key = key;
1857 this.hbi = hbi;
1860 @Override
1861 public synchronized Void call() throws Exception {
1862 try {
1863 checkRegionConsistency(key, hbi);
1864 } catch (Exception e) {
1865 // If the region is non-META region, skip this region and send warning/error message; if
1866 // the region is META region, we should not continue.
1867 LOG.warn("Unable to complete check or repair the region '" + hbi.getRegionNameAsString()
1868 + "'.", e);
1869 if (hbi.getHdfsHRI().isMetaRegion()) {
1870 throw e;
1872 LOG.warn("Skip region '" + hbi.getRegionNameAsString() + "'");
1873 addSkippedRegion(hbi);
1875 return null;
1879 private void addSkippedRegion(final HbckRegionInfo hbi) {
1880 Set<String> skippedRegionNames = skippedRegions.get(hbi.getTableName());
1881 if (skippedRegionNames == null) {
1882 skippedRegionNames = new HashSet<>();
1884 skippedRegionNames.add(hbi.getRegionNameAsString());
1885 skippedRegions.put(hbi.getTableName(), skippedRegionNames);
1889 * Check and fix table states, assumes full info available:
1890 * - tableInfos
1891 * - empty tables loaded
1893 private void checkAndFixTableStates() throws IOException {
1894 // first check dangling states
1895 for (Entry<TableName, TableState> entry : tableStates.entrySet()) {
1896 TableName tableName = entry.getKey();
1897 TableState tableState = entry.getValue();
1898 HbckTableInfo tableInfo = tablesInfo.get(tableName);
1899 if (isTableIncluded(tableName)
1900 && !tableName.isSystemTable()
1901 && tableInfo == null) {
1902 if (fixMeta) {
1903 MetaTableAccessor.deleteTableState(connection, tableName);
1904 TableState state = MetaTableAccessor.getTableState(connection, tableName);
1905 if (state != null) {
1906 errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
1907 tableName + " unable to delete dangling table state " + tableState);
1909 } else if (!checkMetaOnly) {
1910 // dangling table state in meta if checkMetaOnly is false. If checkMetaOnly is
1911 // true, tableInfo will be null as tablesInfo are not polulated for all tables from hdfs
1912 errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
1913 tableName + " has dangling table state " + tableState);
1917 // check that all tables have states
1918 for (TableName tableName : tablesInfo.keySet()) {
1919 if (isTableIncluded(tableName) && !tableStates.containsKey(tableName)) {
1920 if (fixMeta) {
1921 MetaTableAccessor.updateTableState(connection, tableName, TableState.State.ENABLED);
1922 TableState newState = MetaTableAccessor.getTableState(connection, tableName);
1923 if (newState == null) {
1924 errors.reportError(ERROR_CODE.NO_TABLE_STATE,
1925 "Unable to change state for table " + tableName + " in meta ");
1927 } else {
1928 errors.reportError(ERROR_CODE.NO_TABLE_STATE,
1929 tableName + " has no state in meta ");
1935 private void preCheckPermission() throws IOException {
1936 if (shouldIgnorePreCheckPermission()) {
1937 return;
1940 Path hbaseDir = CommonFSUtils.getRootDir(getConf());
1941 FileSystem fs = hbaseDir.getFileSystem(getConf());
1942 UserProvider userProvider = UserProvider.instantiate(getConf());
1943 UserGroupInformation ugi = userProvider.getCurrent().getUGI();
1944 FileStatus[] files = fs.listStatus(hbaseDir);
1945 for (FileStatus file : files) {
1946 try {
1947 fs.access(file.getPath(), FsAction.WRITE);
1948 } catch (AccessControlException ace) {
1949 LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);
1950 errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
1951 + " does not have write perms to " + file.getPath()
1952 + ". Please rerun hbck as hdfs user " + file.getOwner());
1953 throw ace;
1959 * Deletes region from meta table
1961 private void deleteMetaRegion(HbckRegionInfo hi) throws IOException {
1962 deleteMetaRegion(hi.getMetaEntry().getRegionInfo().getRegionName());
1966 * Deletes region from meta table
1968 private void deleteMetaRegion(byte[] metaKey) throws IOException {
1969 Delete d = new Delete(metaKey);
1970 meta.delete(d);
1971 LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
1975 * Reset the split parent region info in meta table
1977 private void resetSplitParent(HbckRegionInfo hi) throws IOException {
1978 RowMutations mutations = new RowMutations(hi.getMetaEntry().getRegionInfo().getRegionName());
1979 Delete d = new Delete(hi.getMetaEntry().getRegionInfo().getRegionName());
1980 d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
1981 d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
1982 mutations.add(d);
1984 RegionInfo hri = RegionInfoBuilder.newBuilder(hi.getMetaEntry().getRegionInfo())
1985 .setOffline(false).setSplit(false).build();
1986 Put p = MetaTableAccessor.makePutFromRegionInfo(hri, EnvironmentEdgeManager.currentTime());
1987 mutations.add(p);
1989 meta.mutateRow(mutations);
1990 LOG.info("Reset split parent " + hi.getMetaEntry().getRegionInfo().getRegionNameAsString() +
1991 " in META");
1995 * This backwards-compatibility wrapper for permanently offlining a region
1996 * that should not be alive. If the region server does not support the
1997 * "offline" method, it will use the closest unassign method instead. This
1998 * will basically work until one attempts to disable or delete the affected
1999 * table. The problem has to do with in-memory only master state, so
2000 * restarting the HMaster or failing over to another should fix this.
2002 void offline(byte[] regionName) throws IOException {
2003 String regionString = Bytes.toStringBinary(regionName);
2004 if (!rsSupportsOffline) {
2005 LOG.warn(
2006 "Using unassign region " + regionString + " instead of using offline method, you should" +
2007 " restart HMaster after these repairs");
2008 admin.unassign(regionName, true);
2009 return;
2012 // first time we assume the rs's supports #offline.
2013 try {
2014 LOG.info("Offlining region " + regionString);
2015 admin.offline(regionName);
2016 } catch (IOException ioe) {
2017 String notFoundMsg = "java.lang.NoSuchMethodException: " +
2018 "org.apache.hadoop.hbase.master.HMaster.offline([B)";
2019 if (ioe.getMessage().contains(notFoundMsg)) {
2020 LOG.warn("Using unassign region " + regionString +
2021 " instead of using offline method, you should" +
2022 " restart HMaster after these repairs");
2023 rsSupportsOffline = false; // in the future just use unassign
2024 admin.unassign(regionName, true);
2025 return;
2027 throw ioe;
2032 * Attempts to undeploy a region from a region server based in information in
2033 * META. Any operations that modify the file system should make sure that
2034 * its corresponding region is not deployed to prevent data races.
2036 * A separate call is required to update the master in-memory region state
2037 * kept in the AssignementManager. Because disable uses this state instead of
2038 * that found in META, we can't seem to cleanly disable/delete tables that
2039 * have been hbck fixed. When used on a version of HBase that does not have
2040 * the offline ipc call exposed on the master (&lt;0.90.5, &lt;0.92.0) a master
2041 * restart or failover may be required.
2043 void closeRegion(HbckRegionInfo hi) throws IOException, InterruptedException {
2044 if (hi.getMetaEntry() == null && hi.getHdfsEntry() == null) {
2045 undeployRegions(hi);
2046 return;
2049 // get assignment info and hregioninfo from meta.
2050 Get get = new Get(hi.getRegionName());
2051 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
2052 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
2053 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
2054 // also get the locations of the replicas to close if the primary region is being closed
2055 if (hi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2056 int numReplicas = admin.getDescriptor(hi.getTableName()).getRegionReplication();
2057 for (int i = 0; i < numReplicas; i++) {
2058 get.addColumn(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getServerColumn(i));
2059 get.addColumn(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getStartCodeColumn(i));
2062 Result r = meta.get(get);
2063 RegionLocations rl = CatalogFamilyFormat.getRegionLocations(r);
2064 if (rl == null) {
2065 LOG.warn("Unable to close region " + hi.getRegionNameAsString() +
2066 " since meta does not have handle to reach it");
2067 return;
2069 for (HRegionLocation h : rl.getRegionLocations()) {
2070 ServerName serverName = h.getServerName();
2071 if (serverName == null) {
2072 errors.reportError("Unable to close region "
2073 + hi.getRegionNameAsString() + " because meta does not "
2074 + "have handle to reach it.");
2075 continue;
2077 RegionInfo hri = h.getRegion();
2078 if (hri == null) {
2079 LOG.warn("Unable to close region " + hi.getRegionNameAsString()
2080 + " because hbase:meta had invalid or missing "
2081 + HConstants.CATALOG_FAMILY_STR + ":"
2082 + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
2083 + " qualifier value.");
2084 continue;
2086 // close the region -- close files and remove assignment
2087 HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri);
2091 private void undeployRegions(HbckRegionInfo hi) throws IOException, InterruptedException {
2092 undeployRegionsForHbi(hi);
2093 // undeploy replicas of the region (but only if the method is invoked for the primary)
2094 if (hi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2095 return;
2097 int numReplicas = admin.getDescriptor(hi.getTableName()).getRegionReplication();
2098 for (int i = 1; i < numReplicas; i++) {
2099 if (hi.getPrimaryHRIForDeployedReplica() == null) continue;
2100 RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(
2101 hi.getPrimaryHRIForDeployedReplica(), i);
2102 HbckRegionInfo h = regionInfoMap.get(hri.getEncodedName());
2103 if (h != null) {
2104 undeployRegionsForHbi(h);
2105 //set skip checks; we undeployed it, and we don't want to evaluate this anymore
2106 //in consistency checks
2107 h.setSkipChecks(true);
2112 private void undeployRegionsForHbi(HbckRegionInfo hi) throws IOException, InterruptedException {
2113 for (HbckRegionInfo.OnlineEntry rse : hi.getOnlineEntries()) {
2114 LOG.debug("Undeploy region " + rse.getRegionInfo() + " from " + rse.getServerName());
2115 try {
2116 HBaseFsckRepair
2117 .closeRegionSilentlyAndWait(connection, rse.getServerName(), rse.getRegionInfo());
2118 offline(rse.getRegionInfo().getRegionName());
2119 } catch (IOException ioe) {
2120 LOG.warn("Got exception when attempting to offline region "
2121 + Bytes.toString(rse.getRegionInfo().getRegionName()), ioe);
2126 private void tryAssignmentRepair(HbckRegionInfo hbi, String msg) throws IOException,
2127 KeeperException, InterruptedException {
2128 // If we are trying to fix the errors
2129 if (shouldFixAssignments()) {
2130 errors.print(msg);
2131 undeployRegions(hbi);
2132 setShouldRerun();
2133 RegionInfo hri = hbi.getHdfsHRI();
2134 if (hri == null) {
2135 hri = hbi.getMetaEntry().getRegionInfo();
2137 HBaseFsckRepair.fixUnassigned(admin, hri);
2138 HBaseFsckRepair.waitUntilAssigned(admin, hri);
2140 // also assign replicas if needed (do it only when this call operates on a primary replica)
2141 if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) return;
2142 int replicationCount = admin.getDescriptor(hri.getTable()).getRegionReplication();
2143 for (int i = 1; i < replicationCount; i++) {
2144 hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);
2145 HbckRegionInfo h = regionInfoMap.get(hri.getEncodedName());
2146 if (h != null) {
2147 undeployRegions(h);
2148 //set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore
2149 //in consistency checks
2150 h.setSkipChecks(true);
2152 HBaseFsckRepair.fixUnassigned(admin, hri);
2153 HBaseFsckRepair.waitUntilAssigned(admin, hri);
2160 * Check a single region for consistency and correct deployment.
2162 private void checkRegionConsistency(final String key, final HbckRegionInfo hbi)
2163 throws IOException, KeeperException, InterruptedException {
2165 if (hbi.isSkipChecks()) return;
2166 String descriptiveName = hbi.toString();
2167 boolean inMeta = hbi.getMetaEntry() != null;
2168 // In case not checking HDFS, assume the region is on HDFS
2169 boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
2170 boolean hasMetaAssignment = inMeta && hbi.getMetaEntry().regionServer != null;
2171 boolean isDeployed = !hbi.getDeployedOn().isEmpty();
2172 boolean isMultiplyDeployed = hbi.getDeployedOn().size() > 1;
2173 boolean deploymentMatchesMeta = hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
2174 hbi.getMetaEntry().regionServer.equals(hbi.getDeployedOn().get(0));
2175 boolean splitParent = inMeta && hbi.getMetaEntry().getRegionInfo().isSplit() &&
2176 hbi.getMetaEntry().getRegionInfo().isOffline();
2177 boolean shouldBeDeployed =
2178 inMeta && !isTableDisabled(hbi.getMetaEntry().getRegionInfo().getTable());
2179 boolean recentlyModified = inHdfs &&
2180 hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime();
2182 // ========== First the healthy cases =============
2183 if (hbi.containsOnlyHdfsEdits()) {
2184 return;
2186 if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
2187 return;
2188 } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
2189 LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
2190 "tabled that is not deployed");
2191 return;
2192 } else if (recentlyModified) {
2193 LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
2194 return;
2196 // ========== Cases where the region is not in hbase:meta =============
2197 else if (!inMeta && !inHdfs && !isDeployed) {
2198 // We shouldn't have record of this region at all then!
2199 assert false : "Entry for region with no data";
2200 } else if (!inMeta && !inHdfs && isDeployed) {
2201 errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
2202 + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " +
2203 "deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
2204 if (shouldFixAssignments()) {
2205 undeployRegions(hbi);
2208 } else if (!inMeta && inHdfs && !isDeployed) {
2209 if (hbi.isMerged()) {
2210 // This region has already been merged, the remaining hdfs file will be
2211 // cleaned by CatalogJanitor later
2212 hbi.setSkipChecks(true);
2213 LOG.info("Region " + descriptiveName
2214 + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
2215 return;
2217 errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
2218 + descriptiveName + " on HDFS, but not listed in hbase:meta " +
2219 "or deployed on any region server");
2220 // restore region consistency of an adopted orphan
2221 if (shouldFixMeta()) {
2222 if (!hbi.isHdfsRegioninfoPresent()) {
2223 LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
2224 + " in table integrity repair phase if -fixHdfsOrphans was" +
2225 " used.");
2226 return;
2229 RegionInfo hri = hbi.getHdfsHRI();
2230 HbckTableInfo tableInfo = tablesInfo.get(hri.getTable());
2232 for (RegionInfo region : tableInfo.getRegionsFromMeta(this.regionInfoMap)) {
2233 if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
2234 && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(),
2235 hri.getEndKey()) >= 0)
2236 && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {
2237 if(region.isSplit() || region.isOffline()) continue;
2238 Path regionDir = hbi.getHdfsRegionDir();
2239 FileSystem fs = regionDir.getFileSystem(getConf());
2240 List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
2241 for (Path familyDir : familyDirs) {
2242 List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
2243 for (Path referenceFilePath : referenceFilePaths) {
2244 Path parentRegionDir =
2245 StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
2246 if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
2247 LOG.warn(hri + " start and stop keys are in the range of " + region
2248 + ". The region might not be cleaned up from hdfs when region " + region
2249 + " split failed. Hence deleting from hdfs.");
2250 HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,
2251 regionDir.getParent(), hri);
2252 return;
2258 LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
2259 int numReplicas = admin.getDescriptor(hbi.getTableName()).getRegionReplication();
2260 HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2261 admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
2262 .getLiveServerMetrics().keySet(), numReplicas);
2264 tryAssignmentRepair(hbi, "Trying to reassign region...");
2267 } else if (!inMeta && inHdfs && isDeployed) {
2268 errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
2269 + " not in META, but deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
2270 debugLsr(hbi.getHdfsRegionDir());
2271 if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2272 // for replicas, this means that we should undeploy the region (we would have
2273 // gone over the primaries and fixed meta holes in first phase under
2274 // checkAndFixConsistency; we shouldn't get the condition !inMeta at
2275 // this stage unless unwanted replica)
2276 if (shouldFixAssignments()) {
2277 undeployRegionsForHbi(hbi);
2280 if (shouldFixMeta() && hbi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2281 if (!hbi.isHdfsRegioninfoPresent()) {
2282 LOG.error("This should have been repaired in table integrity repair phase");
2283 return;
2286 LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
2287 int numReplicas = admin.getDescriptor(hbi.getTableName()).getRegionReplication();
2288 HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2289 admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
2290 .getLiveServerMetrics().keySet(), numReplicas);
2291 tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2294 // ========== Cases where the region is in hbase:meta =============
2295 } else if (inMeta && inHdfs && !isDeployed && splitParent) {
2296 // check whether this is an actual error, or just transient state where parent
2297 // is not cleaned
2298 if (hbi.getMetaEntry().splitA != null && hbi.getMetaEntry().splitB != null) {
2299 // check that split daughters are there
2300 HbckRegionInfo infoA = this.regionInfoMap.get(hbi.getMetaEntry().splitA.getEncodedName());
2301 HbckRegionInfo infoB = this.regionInfoMap.get(hbi.getMetaEntry().splitB.getEncodedName());
2302 if (infoA != null && infoB != null) {
2303 // we already processed or will process daughters. Move on, nothing to see here.
2304 hbi.setSkipChecks(true);
2305 return;
2309 // For Replica region, we need to do a similar check. If replica is not split successfully,
2310 // error is going to be reported against primary daughter region.
2311 if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2312 LOG.info("Region " + descriptiveName + " is a split parent in META, in HDFS, "
2313 + "and not deployed on any region server. This may be transient.");
2314 hbi.setSkipChecks(true);
2315 return;
2318 errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
2319 + descriptiveName + " is a split parent in META, in HDFS, "
2320 + "and not deployed on any region server. This could be transient, "
2321 + "consider to run the catalog janitor first!");
2322 if (shouldFixSplitParents()) {
2323 setShouldRerun();
2324 resetSplitParent(hbi);
2326 } else if (inMeta && !inHdfs && !isDeployed) {
2327 errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
2328 + descriptiveName + " found in META, but not in HDFS "
2329 + "or deployed on any region server.");
2330 if (shouldFixMeta()) {
2331 deleteMetaRegion(hbi);
2333 } else if (inMeta && !inHdfs && isDeployed) {
2334 errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
2335 + " found in META, but not in HDFS, " +
2336 "and deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
2337 // We treat HDFS as ground truth. Any information in meta is transient
2338 // and equivalent data can be regenerated. So, lets unassign and remove
2339 // these problems from META.
2340 if (shouldFixAssignments()) {
2341 errors.print("Trying to fix unassigned region...");
2342 undeployRegions(hbi);
2344 if (shouldFixMeta()) {
2345 // wait for it to complete
2346 deleteMetaRegion(hbi);
2348 } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
2349 errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
2350 + " not deployed on any region server.");
2351 tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2352 } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
2353 errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2354 "Region " + descriptiveName + " should not be deployed according " +
2355 "to META, but is deployed on " + Joiner.on(", ").join(hbi.getDeployedOn()));
2356 if (shouldFixAssignments()) {
2357 errors.print("Trying to close the region " + descriptiveName);
2358 setShouldRerun();
2359 HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry().getRegionInfo(),
2360 hbi.getDeployedOn());
2362 } else if (inMeta && inHdfs && isMultiplyDeployed) {
2363 errors.reportError(ERROR_CODE.MULTI_DEPLOYED,
2364 "Region " + descriptiveName + " is listed in hbase:meta on region server " +
2365 hbi.getMetaEntry().regionServer + " but is multiply assigned to region servers " +
2366 Joiner.on(", ").join(hbi.getDeployedOn()));
2367 // If we are trying to fix the errors
2368 if (shouldFixAssignments()) {
2369 errors.print("Trying to fix assignment error...");
2370 setShouldRerun();
2371 HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry().getRegionInfo(),
2372 hbi.getDeployedOn());
2374 } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
2375 errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
2376 + descriptiveName + " listed in hbase:meta on region server " +
2377 hbi.getMetaEntry().regionServer + " but found on region server " +
2378 hbi.getDeployedOn().get(0));
2379 // If we are trying to fix the errors
2380 if (shouldFixAssignments()) {
2381 errors.print("Trying to fix assignment error...");
2382 setShouldRerun();
2383 HBaseFsckRepair.fixMultiAssignment(connection, hbi.getMetaEntry().getRegionInfo(),
2384 hbi.getDeployedOn());
2385 HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
2387 } else {
2388 errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
2389 " is in an unforeseen state:" +
2390 " inMeta=" + inMeta +
2391 " inHdfs=" + inHdfs +
2392 " isDeployed=" + isDeployed +
2393 " isMultiplyDeployed=" + isMultiplyDeployed +
2394 " deploymentMatchesMeta=" + deploymentMatchesMeta +
2395 " shouldBeDeployed=" + shouldBeDeployed);
2400 * Checks tables integrity. Goes over all regions and scans the tables.
2401 * Collects all the pieces for each table and checks if there are missing,
2402 * repeated or overlapping ones.
2403 * @throws IOException
2405 SortedMap<TableName, HbckTableInfo> checkIntegrity() throws IOException {
2406 tablesInfo = new TreeMap<>();
2407 LOG.debug("There are " + regionInfoMap.size() + " region info entries");
2408 for (HbckRegionInfo hbi : regionInfoMap.values()) {
2409 // Check only valid, working regions
2410 if (hbi.getMetaEntry() == null) {
2411 // this assumes that consistency check has run loadMetaEntry
2412 Path p = hbi.getHdfsRegionDir();
2413 if (p == null) {
2414 errors.report("No regioninfo in Meta or HDFS. " + hbi);
2417 // TODO test.
2418 continue;
2420 if (hbi.getMetaEntry().regionServer == null) {
2421 errors.detail("Skipping region because no region server: " + hbi);
2422 continue;
2424 if (hbi.getMetaEntry().getRegionInfo().isOffline()) {
2425 errors.detail("Skipping region because it is offline: " + hbi);
2426 continue;
2428 if (hbi.containsOnlyHdfsEdits()) {
2429 errors.detail("Skipping region because it only contains edits" + hbi);
2430 continue;
2433 // Missing regionDir or over-deployment is checked elsewhere. Include
2434 // these cases in modTInfo, so we can evaluate those regions as part of
2435 // the region chain in META
2436 //if (hbi.foundRegionDir == null) continue;
2437 //if (hbi.deployedOn.size() != 1) continue;
2438 if (hbi.getDeployedOn().isEmpty()) {
2439 continue;
2442 // We should be safe here
2443 TableName tableName = hbi.getMetaEntry().getRegionInfo().getTable();
2444 HbckTableInfo modTInfo = tablesInfo.get(tableName);
2445 if (modTInfo == null) {
2446 modTInfo = new HbckTableInfo(tableName, this);
2448 for (ServerName server : hbi.getDeployedOn()) {
2449 modTInfo.addServer(server);
2452 if (!hbi.isSkipChecks()) {
2453 modTInfo.addRegionInfo(hbi);
2456 tablesInfo.put(tableName, modTInfo);
2459 loadTableInfosForTablesWithNoRegion();
2461 logParallelMerge();
2462 for (HbckTableInfo tInfo : tablesInfo.values()) {
2463 TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2464 if (!tInfo.checkRegionChain(handler)) {
2465 errors.report("Found inconsistency in table " + tInfo.getName());
2468 return tablesInfo;
2471 /** Loads table info's for tables that may not have been included, since there are no
2472 * regions reported for the table, but table dir is there in hdfs
2474 private void loadTableInfosForTablesWithNoRegion() throws IOException {
2475 Map<String, TableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
2476 for (TableDescriptor htd : allTables.values()) {
2477 if (checkMetaOnly && !htd.isMetaTable()) {
2478 continue;
2481 TableName tableName = htd.getTableName();
2482 if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
2483 HbckTableInfo tableInfo = new HbckTableInfo(tableName, this);
2484 tableInfo.htds.add(htd);
2485 tablesInfo.put(htd.getTableName(), tableInfo);
2491 * Merge hdfs data by moving from contained HbckRegionInfo into targetRegionDir.
2492 * @return number of file move fixes done to merge regions.
2494 public int mergeRegionDirs(Path targetRegionDir, HbckRegionInfo contained) throws IOException {
2495 int fileMoves = 0;
2496 String thread = Thread.currentThread().getName();
2497 LOG.debug("[" + thread + "] Contained region dir after close and pause");
2498 debugLsr(contained.getHdfsRegionDir());
2500 // rename the contained into the container.
2501 FileSystem fs = targetRegionDir.getFileSystem(getConf());
2502 FileStatus[] dirs = null;
2503 try {
2504 dirs = fs.listStatus(contained.getHdfsRegionDir());
2505 } catch (FileNotFoundException fnfe) {
2506 // region we are attempting to merge in is not present! Since this is a merge, there is
2507 // no harm skipping this region if it does not exist.
2508 if (!fs.exists(contained.getHdfsRegionDir())) {
2509 LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2510 + " is missing. Assuming already sidelined or moved.");
2511 } else {
2512 sidelineRegionDir(fs, contained);
2514 return fileMoves;
2517 if (dirs == null) {
2518 if (!fs.exists(contained.getHdfsRegionDir())) {
2519 LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2520 + " already sidelined.");
2521 } else {
2522 sidelineRegionDir(fs, contained);
2524 return fileMoves;
2527 for (FileStatus cf : dirs) {
2528 Path src = cf.getPath();
2529 Path dst = new Path(targetRegionDir, src.getName());
2531 if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
2532 // do not copy the old .regioninfo file.
2533 continue;
2536 if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2537 // do not copy the .oldlogs files
2538 continue;
2541 LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2542 // FileSystem.rename is inconsistent with directories -- if the
2543 // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
2544 // it moves the src into the dst dir resulting in (foo/a/b). If
2545 // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
2546 for (FileStatus hfile : fs.listStatus(src)) {
2547 boolean success = fs.rename(hfile.getPath(), dst);
2548 if (success) {
2549 fileMoves++;
2552 LOG.debug("[" + thread + "] Sideline directory contents:");
2553 debugLsr(targetRegionDir);
2556 // if all success.
2557 sidelineRegionDir(fs, contained);
2558 LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
2559 getSidelineDir());
2560 debugLsr(contained.getHdfsRegionDir());
2562 return fileMoves;
2566 static class WorkItemOverlapMerge implements Callable<Void> {
2567 private TableIntegrityErrorHandler handler;
2568 Collection<HbckRegionInfo> overlapgroup;
2570 WorkItemOverlapMerge(Collection<HbckRegionInfo> overlapgroup,
2571 TableIntegrityErrorHandler handler) {
2572 this.handler = handler;
2573 this.overlapgroup = overlapgroup;
2576 @Override
2577 public Void call() throws Exception {
2578 handler.handleOverlapGroup(overlapgroup);
2579 return null;
2584 * Return a list of user-space table names whose metadata have not been
2585 * modified in the last few milliseconds specified by timelag
2586 * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
2587 * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
2588 * milliseconds specified by timelag, then the table is a candidate to be returned.
2589 * @return tables that have not been modified recently
2590 * @throws IOException if an error is encountered
2592 TableDescriptor[] getTables(AtomicInteger numSkipped) {
2593 List<TableName> tableNames = new ArrayList<>();
2594 long now = EnvironmentEdgeManager.currentTime();
2596 for (HbckRegionInfo hbi : regionInfoMap.values()) {
2597 HbckRegionInfo.MetaEntry info = hbi.getMetaEntry();
2599 // if the start key is zero, then we have found the first region of a table.
2600 // pick only those tables that were not modified in the last few milliseconds.
2601 if (info != null && info.getRegionInfo().getStartKey().length == 0 &&
2602 !info.getRegionInfo().isMetaRegion()) {
2603 if (info.modTime + timelag < now) {
2604 tableNames.add(info.getRegionInfo().getTable());
2605 } else {
2606 numSkipped.incrementAndGet(); // one more in-flux table
2610 return getTableDescriptors(tableNames);
2613 TableDescriptor[] getTableDescriptors(List<TableName> tableNames) {
2614 LOG.info("getTableDescriptors == tableNames => " + tableNames);
2615 try (Connection conn = ConnectionFactory.createConnection(getConf());
2616 Admin admin = conn.getAdmin()) {
2617 List<TableDescriptor> tds = admin.listTableDescriptors(tableNames);
2618 return tds.toArray(new TableDescriptor[tds.size()]);
2619 } catch (IOException e) {
2620 LOG.debug("Exception getting table descriptors", e);
2622 return new TableDescriptor[0];
2626 * Gets the entry in regionInfo corresponding to the the given encoded
2627 * region name. If the region has not been seen yet, a new entry is added
2628 * and returned.
2630 private synchronized HbckRegionInfo getOrCreateInfo(String name) {
2631 HbckRegionInfo hbi = regionInfoMap.get(name);
2632 if (hbi == null) {
2633 hbi = new HbckRegionInfo(null);
2634 regionInfoMap.put(name, hbi);
2636 return hbi;
2639 private void checkAndFixReplication() throws ReplicationException {
2640 ReplicationChecker checker = new ReplicationChecker(getConf(), zkw, errors);
2641 checker.checkUnDeletedQueues();
2643 if (checker.hasUnDeletedQueues() && this.fixReplication) {
2644 checker.fixUnDeletedQueues();
2645 setShouldRerun();
2650 * Check values in regionInfo for hbase:meta
2651 * Check if zero or more than one regions with hbase:meta are found.
2652 * If there are inconsistencies (i.e. zero or more than one regions
2653 * pretend to be holding the hbase:meta) try to fix that and report an error.
2654 * @throws IOException from HBaseFsckRepair functions
2655 * @throws KeeperException
2656 * @throws InterruptedException
2658 boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
2659 Map<Integer, HbckRegionInfo> metaRegions = new HashMap<>();
2660 for (HbckRegionInfo value : regionInfoMap.values()) {
2661 if (value.getMetaEntry() != null && value.getMetaEntry().getRegionInfo().isMetaRegion()) {
2662 metaRegions.put(value.getReplicaId(), value);
2665 int metaReplication = admin.getDescriptor(TableName.META_TABLE_NAME)
2666 .getRegionReplication();
2667 boolean noProblem = true;
2668 // There will be always entries in regionInfoMap corresponding to hbase:meta & its replicas
2669 // Check the deployed servers. It should be exactly one server for each replica.
2670 for (int i = 0; i < metaReplication; i++) {
2671 HbckRegionInfo metaHbckRegionInfo = metaRegions.remove(i);
2672 List<ServerName> servers = new ArrayList<>();
2673 if (metaHbckRegionInfo != null) {
2674 servers = metaHbckRegionInfo.getDeployedOn();
2676 if (servers.size() != 1) {
2677 noProblem = false;
2678 if (servers.isEmpty()) {
2679 assignMetaReplica(i);
2680 } else if (servers.size() > 1) {
2681 errors
2682 .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId " +
2683 metaHbckRegionInfo.getReplicaId() + " is found on more than one region.");
2684 if (shouldFixAssignments()) {
2685 errors.print("Trying to fix a problem with hbase:meta, replicaId " +
2686 metaHbckRegionInfo.getReplicaId() + "..");
2687 setShouldRerun();
2688 // try fix it (treat is a dupe assignment)
2689 HBaseFsckRepair.fixMultiAssignment(connection,
2690 metaHbckRegionInfo.getMetaEntry().getRegionInfo(), servers);
2695 // unassign whatever is remaining in metaRegions. They are excess replicas.
2696 for (Map.Entry<Integer, HbckRegionInfo> entry : metaRegions.entrySet()) {
2697 noProblem = false;
2698 errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2699 "hbase:meta replicas are deployed in excess. Configured " + metaReplication +
2700 ", deployed " + metaRegions.size());
2701 if (shouldFixAssignments()) {
2702 errors.print("Trying to undeploy excess replica, replicaId: " + entry.getKey() +
2703 " of hbase:meta..");
2704 setShouldRerun();
2705 unassignMetaReplica(entry.getValue());
2708 // if noProblem is false, rerun hbck with hopefully fixed META
2709 // if noProblem is true, no errors, so continue normally
2710 return noProblem;
2713 private void unassignMetaReplica(HbckRegionInfo hi)
2714 throws IOException, InterruptedException, KeeperException {
2715 undeployRegions(hi);
2716 ZKUtil.deleteNode(zkw,
2717 zkw.getZNodePaths().getZNodeForReplica(hi.getMetaEntry().getRegionInfo().getReplicaId()));
2720 private void assignMetaReplica(int replicaId)
2721 throws IOException, KeeperException, InterruptedException {
2722 errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta, replicaId " +
2723 replicaId +" is not found on any region.");
2724 if (shouldFixAssignments()) {
2725 errors.print("Trying to fix a problem with hbase:meta..");
2726 setShouldRerun();
2727 // try to fix it (treat it as unassigned region)
2728 RegionInfo h = RegionReplicaUtil.getRegionInfoForReplica(
2729 RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId);
2730 HBaseFsckRepair.fixUnassigned(admin, h);
2731 HBaseFsckRepair.waitUntilAssigned(admin, h);
2736 * Scan hbase:meta, adding all regions found to the regionInfo map.
2737 * @throws IOException if an error is encountered
2739 boolean loadMetaEntries() throws IOException {
2740 ClientMetaTableAccessor.Visitor visitor = new ClientMetaTableAccessor.Visitor() {
2741 int countRecord = 1;
2743 // comparator to sort KeyValues with latest modtime
2744 final Comparator<Cell> comp = new Comparator<Cell>() {
2745 @Override
2746 public int compare(Cell k1, Cell k2) {
2747 return Long.compare(k1.getTimestamp(), k2.getTimestamp());
2751 @Override
2752 public boolean visit(Result result) throws IOException {
2753 try {
2755 // record the latest modification of this META record
2756 long ts = Collections.max(result.listCells(), comp).getTimestamp();
2757 RegionLocations rl = CatalogFamilyFormat.getRegionLocations(result);
2758 if (rl == null) {
2759 emptyRegionInfoQualifiers.add(result);
2760 errors.reportError(ERROR_CODE.EMPTY_META_CELL,
2761 "Empty REGIONINFO_QUALIFIER found in hbase:meta");
2762 return true;
2764 ServerName sn = null;
2765 if (rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID) == null ||
2766 rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID).getRegion() == null) {
2767 emptyRegionInfoQualifiers.add(result);
2768 errors.reportError(ERROR_CODE.EMPTY_META_CELL,
2769 "Empty REGIONINFO_QUALIFIER found in hbase:meta");
2770 return true;
2772 RegionInfo hri = rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID).getRegion();
2773 if (!(isTableIncluded(hri.getTable())
2774 || hri.isMetaRegion())) {
2775 return true;
2777 PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(result);
2778 for (HRegionLocation h : rl.getRegionLocations()) {
2779 if (h == null || h.getRegion() == null) {
2780 continue;
2782 sn = h.getServerName();
2783 hri = h.getRegion();
2785 HbckRegionInfo.MetaEntry m = null;
2786 if (hri.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2787 m = new HbckRegionInfo.MetaEntry(hri, sn, ts, daughters.getFirst(),
2788 daughters.getSecond());
2789 } else {
2790 m = new HbckRegionInfo.MetaEntry(hri, sn, ts, null, null);
2792 HbckRegionInfo previous = regionInfoMap.get(hri.getEncodedName());
2793 if (previous == null) {
2794 regionInfoMap.put(hri.getEncodedName(), new HbckRegionInfo(m));
2795 } else if (previous.getMetaEntry() == null) {
2796 previous.setMetaEntry(m);
2797 } else {
2798 throw new IOException("Two entries in hbase:meta are same " + previous);
2801 List<RegionInfo> mergeParents = CatalogFamilyFormat.getMergeRegions(result.rawCells());
2802 if (mergeParents != null) {
2803 for (RegionInfo mergeRegion : mergeParents) {
2804 if (mergeRegion != null) {
2805 // This region is already being merged
2806 HbckRegionInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName());
2807 hbInfo.setMerged(true);
2812 // show proof of progress to the user, once for every 100 records.
2813 if (countRecord % 100 == 0) {
2814 errors.progress();
2816 countRecord++;
2817 return true;
2818 } catch (RuntimeException e) {
2819 LOG.error("Result=" + result);
2820 throw e;
2824 if (!checkMetaOnly) {
2825 // Scan hbase:meta to pick up user regions
2826 MetaTableAccessor.fullScanRegions(connection, visitor);
2829 errors.print("");
2830 return true;
2834 * Prints summary of all tables found on the system.
2836 private void printTableSummary(SortedMap<TableName, HbckTableInfo> tablesInfo) {
2837 StringBuilder sb = new StringBuilder();
2838 int numOfSkippedRegions;
2839 errors.print("Summary:");
2840 for (HbckTableInfo tInfo : tablesInfo.values()) {
2841 numOfSkippedRegions = (skippedRegions.containsKey(tInfo.getName())) ?
2842 skippedRegions.get(tInfo.getName()).size() : 0;
2844 if (errors.tableHasErrors(tInfo)) {
2845 errors.print("Table " + tInfo.getName() + " is inconsistent.");
2846 } else if (numOfSkippedRegions > 0){
2847 errors.print("Table " + tInfo.getName() + " is okay (with "
2848 + numOfSkippedRegions + " skipped regions).");
2850 else {
2851 errors.print("Table " + tInfo.getName() + " is okay.");
2853 errors.print(" Number of regions: " + tInfo.getNumRegions());
2854 if (numOfSkippedRegions > 0) {
2855 Set<String> skippedRegionStrings = skippedRegions.get(tInfo.getName());
2856 System.out.println(" Number of skipped regions: " + numOfSkippedRegions);
2857 System.out.println(" List of skipped regions:");
2858 for(String sr : skippedRegionStrings) {
2859 System.out.println(" " + sr);
2862 sb.setLength(0); // clear out existing buffer, if any.
2863 sb.append(" Deployed on: ");
2864 for (ServerName server : tInfo.deployedOn) {
2865 sb.append(" " + server.toString());
2867 errors.print(sb.toString());
2871 static HbckErrorReporter getErrorReporter(final Configuration conf)
2872 throws ClassNotFoundException {
2873 Class<? extends HbckErrorReporter> reporter =
2874 conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class,
2875 HbckErrorReporter.class);
2876 return ReflectionUtils.newInstance(reporter, conf);
2879 static class PrintingErrorReporter implements HbckErrorReporter {
2880 public int errorCount = 0;
2881 private int showProgress;
2882 // How frequently calls to progress() will create output
2883 private static final int progressThreshold = 100;
2885 Set<HbckTableInfo> errorTables = new HashSet<>();
2887 // for use by unit tests to verify which errors were discovered
2888 private ArrayList<ERROR_CODE> errorList = new ArrayList<>();
2890 @Override
2891 public void clear() {
2892 errorTables.clear();
2893 errorList.clear();
2894 errorCount = 0;
2897 @Override
2898 public synchronized void reportError(ERROR_CODE errorCode, String message) {
2899 if (errorCode == ERROR_CODE.WRONG_USAGE) {
2900 System.err.println(message);
2901 return;
2904 errorList.add(errorCode);
2905 if (!summary) {
2906 System.out.println("ERROR: " + message);
2908 errorCount++;
2909 showProgress = 0;
2912 @Override
2913 public synchronized void reportError(ERROR_CODE errorCode, String message,
2914 HbckTableInfo table) {
2915 errorTables.add(table);
2916 reportError(errorCode, message);
2919 @Override
2920 public synchronized void reportError(ERROR_CODE errorCode, String message, HbckTableInfo table,
2921 HbckRegionInfo info) {
2922 errorTables.add(table);
2923 String reference = "(region " + info.getRegionNameAsString() + ")";
2924 reportError(errorCode, reference + " " + message);
2927 @Override
2928 public synchronized void reportError(ERROR_CODE errorCode, String message, HbckTableInfo table,
2929 HbckRegionInfo info1, HbckRegionInfo info2) {
2930 errorTables.add(table);
2931 String reference = "(regions " + info1.getRegionNameAsString()
2932 + " and " + info2.getRegionNameAsString() + ")";
2933 reportError(errorCode, reference + " " + message);
2936 @Override
2937 public synchronized void reportError(String message) {
2938 reportError(ERROR_CODE.UNKNOWN, message);
2942 * Report error information, but do not increment the error count. Intended for cases
2943 * where the actual error would have been reported previously.
2944 * @param message
2946 @Override
2947 public synchronized void report(String message) {
2948 if (! summary) {
2949 System.out.println("ERROR: " + message);
2951 showProgress = 0;
2954 @Override
2955 public synchronized int summarize() {
2956 System.out.println(Integer.toString(errorCount) +
2957 " inconsistencies detected.");
2958 if (errorCount == 0) {
2959 System.out.println("Status: OK");
2960 return 0;
2961 } else {
2962 System.out.println("Status: INCONSISTENT");
2963 return -1;
2967 @Override
2968 public ArrayList<ERROR_CODE> getErrorList() {
2969 return errorList;
2972 @Override
2973 public synchronized void print(String message) {
2974 if (!summary) {
2975 System.out.println(message);
2979 @Override
2980 public boolean tableHasErrors(HbckTableInfo table) {
2981 return errorTables.contains(table);
2984 @Override
2985 public void resetErrors() {
2986 errorCount = 0;
2989 @Override
2990 public synchronized void detail(String message) {
2991 if (details) {
2992 System.out.println(message);
2994 showProgress = 0;
2997 @Override
2998 public synchronized void progress() {
2999 if (showProgress++ == progressThreshold) {
3000 if (!summary) {
3001 System.out.print(".");
3003 showProgress = 0;
3009 * Contact a region server and get all information from it
3011 static class WorkItemRegion implements Callable<Void> {
3012 private final HBaseFsck hbck;
3013 private final ServerName rsinfo;
3014 private final HbckErrorReporter errors;
3015 private final Connection connection;
3017 WorkItemRegion(HBaseFsck hbck, ServerName info, HbckErrorReporter errors,
3018 Connection connection) {
3019 this.hbck = hbck;
3020 this.rsinfo = info;
3021 this.errors = errors;
3022 this.connection = connection;
3025 @Override
3026 public synchronized Void call() throws IOException {
3027 errors.progress();
3028 try {
3029 // list all online regions from this region server
3030 List<RegionInfo> regions = connection.getAdmin().getRegions(rsinfo);
3031 regions = filterRegions(regions);
3033 if (details) {
3034 errors.detail(
3035 "RegionServer: " + rsinfo.getServerName() + " number of regions: " + regions.size());
3036 for (RegionInfo rinfo : regions) {
3037 errors.detail(" " + rinfo.getRegionNameAsString() + " id: " + rinfo.getRegionId() +
3038 " encoded_name: " + rinfo.getEncodedName() + " start: " +
3039 Bytes.toStringBinary(rinfo.getStartKey()) + " end: " +
3040 Bytes.toStringBinary(rinfo.getEndKey()));
3044 // check to see if the existence of this region matches the region in META
3045 for (RegionInfo r : regions) {
3046 HbckRegionInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
3047 hbi.addServer(r, rsinfo);
3049 } catch (IOException e) { // unable to connect to the region server.
3050 errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE,
3051 "RegionServer: " + rsinfo.getServerName() + " Unable to fetch region information. " + e);
3052 throw e;
3054 return null;
3057 private List<RegionInfo> filterRegions(List<RegionInfo> regions) {
3058 List<RegionInfo> ret = Lists.newArrayList();
3059 for (RegionInfo hri : regions) {
3060 if (hri.isMetaRegion() || (!hbck.checkMetaOnly
3061 && hbck.isTableIncluded(hri.getTable()))) {
3062 ret.add(hri);
3065 return ret;
3070 * Contact hdfs and get all information about specified table directory into
3071 * regioninfo list.
3073 class WorkItemHdfsDir implements Callable<Void> {
3074 private FileStatus tableDir;
3075 private HbckErrorReporter errors;
3076 private FileSystem fs;
3078 WorkItemHdfsDir(FileSystem fs, HbckErrorReporter errors, FileStatus status) {
3079 this.fs = fs;
3080 this.tableDir = status;
3081 this.errors = errors;
3084 @Override
3085 public synchronized Void call() throws InterruptedException, ExecutionException {
3086 final Vector<Exception> exceptions = new Vector<>();
3088 try {
3089 final FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
3090 final List<Future<?>> futures = new ArrayList<>(regionDirs.length);
3092 for (final FileStatus regionDir : regionDirs) {
3093 errors.progress();
3094 final String encodedName = regionDir.getPath().getName();
3095 // ignore directories that aren't hexadecimal
3096 if (!encodedName.toLowerCase(Locale.ROOT).matches("[0-9a-f]+")) {
3097 continue;
3100 if (!exceptions.isEmpty()) {
3101 break;
3104 futures.add(executor.submit(new Runnable() {
3105 @Override
3106 public void run() {
3107 try {
3108 LOG.debug("Loading region info from hdfs:"+ regionDir.getPath());
3110 Path regioninfoFile = new Path(regionDir.getPath(), HRegionFileSystem.REGION_INFO_FILE);
3111 boolean regioninfoFileExists = fs.exists(regioninfoFile);
3113 if (!regioninfoFileExists) {
3114 // As tables become larger it is more and more likely that by the time you
3115 // reach a given region that it will be gone due to region splits/merges.
3116 if (!fs.exists(regionDir.getPath())) {
3117 LOG.warn("By the time we tried to process this region dir it was already gone: "
3118 + regionDir.getPath());
3119 return;
3123 HbckRegionInfo hbi = HBaseFsck.this.getOrCreateInfo(encodedName);
3124 HbckRegionInfo.HdfsEntry he = new HbckRegionInfo.HdfsEntry();
3125 synchronized (hbi) {
3126 if (hbi.getHdfsRegionDir() != null) {
3127 errors.print("Directory " + encodedName + " duplicate??" +
3128 hbi.getHdfsRegionDir());
3131 he.regionDir = regionDir.getPath();
3132 he.regionDirModTime = regionDir.getModificationTime();
3133 he.hdfsRegioninfoFilePresent = regioninfoFileExists;
3134 // we add to orphan list when we attempt to read .regioninfo
3136 // Set a flag if this region contains only edits
3137 // This is special case if a region is left after split
3138 he.hdfsOnlyEdits = true;
3139 FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
3140 Path ePath = WALSplitUtil.getRegionDirRecoveredEditsDir(regionDir.getPath());
3141 for (FileStatus subDir : subDirs) {
3142 errors.progress();
3143 String sdName = subDir.getPath().getName();
3144 if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
3145 he.hdfsOnlyEdits = false;
3146 break;
3149 hbi.setHdfsEntry(he);
3151 } catch (Exception e) {
3152 LOG.error("Could not load region dir", e);
3153 exceptions.add(e);
3156 }));
3159 // Ensure all pending tasks are complete (or that we run into an exception)
3160 for (Future<?> f : futures) {
3161 if (!exceptions.isEmpty()) {
3162 break;
3164 try {
3165 f.get();
3166 } catch (ExecutionException e) {
3167 LOG.error("Unexpected exec exception! Should've been caught already. (Bug?)", e);
3168 // Shouldn't happen, we already logged/caught any exceptions in the Runnable
3171 } catch (IOException e) {
3172 LOG.error("Cannot execute WorkItemHdfsDir for " + tableDir, e);
3173 exceptions.add(e);
3174 } finally {
3175 if (!exceptions.isEmpty()) {
3176 errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: "
3177 + tableDir.getPath().getName()
3178 + " Unable to fetch all HDFS region information. ");
3179 // Just throw the first exception as an indication something bad happened
3180 // Don't need to propagate all the exceptions, we already logged them all anyway
3181 throw new ExecutionException("First exception in WorkItemHdfsDir", exceptions.firstElement());
3184 return null;
3189 * Contact hdfs and get all information about specified table directory into
3190 * regioninfo list.
3192 static class WorkItemHdfsRegionInfo implements Callable<Void> {
3193 private HbckRegionInfo hbi;
3194 private HBaseFsck hbck;
3195 private HbckErrorReporter errors;
3197 WorkItemHdfsRegionInfo(HbckRegionInfo hbi, HBaseFsck hbck, HbckErrorReporter errors) {
3198 this.hbi = hbi;
3199 this.hbck = hbck;
3200 this.errors = errors;
3203 @Override
3204 public synchronized Void call() throws IOException {
3205 // only load entries that haven't been loaded yet.
3206 if (hbi.getHdfsHRI() == null) {
3207 try {
3208 errors.progress();
3209 hbi.loadHdfsRegioninfo(hbck.getConf());
3210 } catch (IOException ioe) {
3211 String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
3212 + hbi.getTableName() + " in hdfs dir "
3213 + hbi.getHdfsRegionDir()
3214 + "! It may be an invalid format or version file. Treating as "
3215 + "an orphaned regiondir.";
3216 errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
3217 try {
3218 hbck.debugLsr(hbi.getHdfsRegionDir());
3219 } catch (IOException ioe2) {
3220 LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
3221 throw ioe2;
3223 hbck.orphanHdfsDirs.add(hbi);
3224 throw ioe;
3227 return null;
3232 * Display the full report from fsck. This displays all live and dead region
3233 * servers, and all known regions.
3235 public static void setDisplayFullReport() {
3236 details = true;
3239 public static boolean shouldDisplayFullReport() {
3240 return details;
3244 * Set exclusive mode.
3246 public static void setForceExclusive() {
3247 forceExclusive = true;
3251 * Only one instance of hbck can modify HBase at a time.
3253 public boolean isExclusive() {
3254 return fixAny || forceExclusive;
3258 * Set summary mode.
3259 * Print only summary of the tables and status (OK or INCONSISTENT)
3261 static void setSummary() {
3262 summary = true;
3266 * Set hbase:meta check mode.
3267 * Print only info about hbase:meta table deployment/state
3269 void setCheckMetaOnly() {
3270 checkMetaOnly = true;
3274 * Set region boundaries check mode.
3276 void setRegionBoundariesCheck() {
3277 checkRegionBoundaries = true;
3281 * Set replication fix mode.
3283 public void setFixReplication(boolean shouldFix) {
3284 fixReplication = shouldFix;
3285 fixAny |= shouldFix;
3288 public void setCleanReplicationBarrier(boolean shouldClean) {
3289 cleanReplicationBarrier = shouldClean;
3293 * Check if we should rerun fsck again. This checks if we've tried to
3294 * fix something and we should rerun fsck tool again.
3295 * Display the full report from fsck. This displays all live and dead
3296 * region servers, and all known regions.
3298 void setShouldRerun() {
3299 rerun = true;
3302 public boolean shouldRerun() {
3303 return rerun;
3307 * Fix inconsistencies found by fsck. This should try to fix errors (if any)
3308 * found by fsck utility.
3310 public void setFixAssignments(boolean shouldFix) {
3311 fixAssignments = shouldFix;
3312 fixAny |= shouldFix;
3315 boolean shouldFixAssignments() {
3316 return fixAssignments;
3319 public void setFixMeta(boolean shouldFix) {
3320 fixMeta = shouldFix;
3321 fixAny |= shouldFix;
3324 boolean shouldFixMeta() {
3325 return fixMeta;
3328 public void setFixEmptyMetaCells(boolean shouldFix) {
3329 fixEmptyMetaCells = shouldFix;
3330 fixAny |= shouldFix;
3333 boolean shouldFixEmptyMetaCells() {
3334 return fixEmptyMetaCells;
3337 public void setCheckHdfs(boolean checking) {
3338 checkHdfs = checking;
3341 boolean shouldCheckHdfs() {
3342 return checkHdfs;
3345 public void setFixHdfsHoles(boolean shouldFix) {
3346 fixHdfsHoles = shouldFix;
3347 fixAny |= shouldFix;
3350 boolean shouldFixHdfsHoles() {
3351 return fixHdfsHoles;
3354 public void setFixTableOrphans(boolean shouldFix) {
3355 fixTableOrphans = shouldFix;
3356 fixAny |= shouldFix;
3359 boolean shouldFixTableOrphans() {
3360 return fixTableOrphans;
3363 public void setFixHdfsOverlaps(boolean shouldFix) {
3364 fixHdfsOverlaps = shouldFix;
3365 fixAny |= shouldFix;
3368 boolean shouldFixHdfsOverlaps() {
3369 return fixHdfsOverlaps;
3372 public void setFixHdfsOrphans(boolean shouldFix) {
3373 fixHdfsOrphans = shouldFix;
3374 fixAny |= shouldFix;
3377 boolean shouldFixHdfsOrphans() {
3378 return fixHdfsOrphans;
3381 public void setFixVersionFile(boolean shouldFix) {
3382 fixVersionFile = shouldFix;
3383 fixAny |= shouldFix;
3386 public boolean shouldFixVersionFile() {
3387 return fixVersionFile;
3390 public void setSidelineBigOverlaps(boolean sbo) {
3391 this.sidelineBigOverlaps = sbo;
3394 public boolean shouldSidelineBigOverlaps() {
3395 return sidelineBigOverlaps;
3398 public void setFixSplitParents(boolean shouldFix) {
3399 fixSplitParents = shouldFix;
3400 fixAny |= shouldFix;
3403 public void setRemoveParents(boolean shouldFix) {
3404 removeParents = shouldFix;
3405 fixAny |= shouldFix;
3408 boolean shouldFixSplitParents() {
3409 return fixSplitParents;
3412 boolean shouldRemoveParents() {
3413 return removeParents;
3416 public void setFixReferenceFiles(boolean shouldFix) {
3417 fixReferenceFiles = shouldFix;
3418 fixAny |= shouldFix;
3421 boolean shouldFixReferenceFiles() {
3422 return fixReferenceFiles;
3425 public void setFixHFileLinks(boolean shouldFix) {
3426 fixHFileLinks = shouldFix;
3427 fixAny |= shouldFix;
3430 boolean shouldFixHFileLinks() {
3431 return fixHFileLinks;
3434 public boolean shouldIgnorePreCheckPermission() {
3435 return !fixAny || ignorePreCheckPermission;
3438 public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
3439 this.ignorePreCheckPermission = ignorePreCheckPermission;
3443 * @param mm maximum number of regions to merge into a single region.
3445 public void setMaxMerge(int mm) {
3446 this.maxMerge = mm;
3449 public int getMaxMerge() {
3450 return maxMerge;
3453 public void setMaxOverlapsToSideline(int mo) {
3454 this.maxOverlapsToSideline = mo;
3457 public int getMaxOverlapsToSideline() {
3458 return maxOverlapsToSideline;
3462 * Only check/fix tables specified by the list,
3463 * Empty list means all tables are included.
3465 boolean isTableIncluded(TableName table) {
3466 return (tablesIncluded.isEmpty()) || tablesIncluded.contains(table);
3469 public void includeTable(TableName table) {
3470 tablesIncluded.add(table);
3473 Set<TableName> getIncludedTables() {
3474 return new HashSet<>(tablesIncluded);
3478 * We are interested in only those tables that have not changed their state in
3479 * hbase:meta during the last few seconds specified by hbase.admin.fsck.timelag
3480 * @param seconds - the time in seconds
3482 public void setTimeLag(long seconds) {
3483 timelag = seconds * 1000; // convert to milliseconds
3488 * @param sidelineDir - HDFS path to sideline data
3490 public void setSidelineDir(String sidelineDir) {
3491 this.sidelineDir = new Path(sidelineDir);
3494 protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
3495 return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
3498 public HFileCorruptionChecker getHFilecorruptionChecker() {
3499 return hfcc;
3502 public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
3503 this.hfcc = hfcc;
3506 public void setRetCode(int code) {
3507 this.retcode = code;
3510 public int getRetCode() {
3511 return retcode;
3514 protected HBaseFsck printUsageAndExit() {
3515 StringWriter sw = new StringWriter(2048);
3516 PrintWriter out = new PrintWriter(sw);
3517 out.println("");
3518 out.println("-----------------------------------------------------------------------");
3519 out.println("NOTE: As of HBase version 2.0, the hbck tool is significantly changed.");
3520 out.println("In general, all Read-Only options are supported and can be be used");
3521 out.println("safely. Most -fix/ -repair options are NOT supported. Please see usage");
3522 out.println("below for details on which options are not supported.");
3523 out.println("-----------------------------------------------------------------------");
3524 out.println("");
3525 out.println("Usage: fsck [opts] {only tables}");
3526 out.println(" where [opts] are:");
3527 out.println(" -help Display help options (this)");
3528 out.println(" -details Display full report of all regions.");
3529 out.println(" -timelag <timeInSeconds> Process only regions that " +
3530 " have not experienced any metadata updates in the last " +
3531 " <timeInSeconds> seconds.");
3532 out.println(" -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
3533 " before checking if the fix worked if run with -fix");
3534 out.println(" -summary Print only summary of the tables and status.");
3535 out.println(" -metaonly Only check the state of the hbase:meta table.");
3536 out.println(" -sidelineDir <hdfs://> HDFS path to backup existing meta.");
3537 out.println(" -boundaries Verify that regions boundaries are the same between META and store files.");
3538 out.println(" -exclusive Abort if another hbck is exclusive or fixing.");
3540 out.println("");
3541 out.println(" Datafile Repair options: (expert features, use with caution!)");
3542 out.println(" -checkCorruptHFiles Check all Hfiles by opening them to make sure they are valid");
3543 out.println(" -sidelineCorruptHFiles Quarantine corrupted HFiles. implies -checkCorruptHFiles");
3545 out.println("");
3546 out.println(" Replication options");
3547 out.println(" -fixReplication Deletes replication queues for removed peers");
3549 out.println("");
3550 out.println(" Metadata Repair options supported as of version 2.0: (expert features, use with caution!)");
3551 out.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs.");
3552 out.println(" -fixReferenceFiles Try to offline lingering reference store files");
3553 out.println(" -fixHFileLinks Try to offline lingering HFileLinks");
3554 out.println(" -noHdfsChecking Don't load/check region info from HDFS."
3555 + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
3556 out.println(" -ignorePreCheckPermission ignore filesystem permission pre-check");
3558 out.println("");
3559 out.println("NOTE: Following options are NOT supported as of HBase version 2.0+.");
3560 out.println("");
3561 out.println(" UNSUPPORTED Metadata Repair options: (expert features, use with caution!)");
3562 out.println(" -fix Try to fix region assignments. This is for backwards compatibility");
3563 out.println(" -fixAssignments Try to fix region assignments. Replaces the old -fix");
3564 out.println(" -fixMeta Try to fix meta problems. This assumes HDFS region info is good.");
3565 out.println(" -fixHdfsHoles Try to fix region holes in hdfs.");
3566 out.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs");
3567 out.println(" -fixTableOrphans Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
3568 out.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs.");
3569 out.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
3570 out.println(" -sidelineBigOverlaps When fixing region overlaps, allow to sideline big overlaps");
3571 out.println(" -maxOverlapsToSideline <n> When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
3572 out.println(" -fixSplitParents Try to force offline split parents to be online.");
3573 out.println(" -removeParents Try to offline and sideline lingering parents and keep daughter regions.");
3574 out.println(" -fixEmptyMetaCells Try to fix hbase:meta entries not referencing any region"
3575 + " (empty REGIONINFO_QUALIFIER rows)");
3577 out.println("");
3578 out.println(" UNSUPPORTED Metadata Repair shortcuts");
3579 out.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
3580 "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles" +
3581 "-fixHFileLinks");
3582 out.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
3583 out.println("");
3584 out.println(" Replication options");
3585 out.println(" -fixReplication Deletes replication queues for removed peers");
3586 out.println(" -cleanReplicationBrarier [tableName] clean the replication barriers " +
3587 "of a specified table, tableName is required");
3588 out.flush();
3589 errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
3591 setRetCode(-2);
3592 return this;
3596 * Main program
3598 * @param args
3599 * @throws Exception
3601 public static void main(String[] args) throws Exception {
3602 // create a fsck object
3603 Configuration conf = HBaseConfiguration.create();
3604 Path hbasedir = CommonFSUtils.getRootDir(conf);
3605 URI defaultFs = hbasedir.getFileSystem(conf).getUri();
3606 CommonFSUtils.setFsDefault(conf, new Path(defaultFs));
3607 int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
3608 System.exit(ret);
3612 * This is a Tool wrapper that gathers -Dxxx=yyy configuration settings from the command line.
3614 static class HBaseFsckTool extends Configured implements Tool {
3615 HBaseFsckTool(Configuration conf) { super(conf); }
3616 @Override
3617 public int run(String[] args) throws Exception {
3618 HBaseFsck hbck = new HBaseFsck(getConf());
3619 hbck.exec(hbck.executor, args);
3620 hbck.close();
3621 return hbck.getRetCode();
3625 public HBaseFsck exec(ExecutorService exec, String[] args)
3626 throws KeeperException, IOException, InterruptedException, ReplicationException {
3627 long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
3629 boolean checkCorruptHFiles = false;
3630 boolean sidelineCorruptHFiles = false;
3632 // Process command-line args.
3633 for (int i = 0; i < args.length; i++) {
3634 String cmd = args[i];
3635 if (cmd.equals("-help") || cmd.equals("-h")) {
3636 return printUsageAndExit();
3637 } else if (cmd.equals("-details")) {
3638 setDisplayFullReport();
3639 } else if (cmd.equals("-exclusive")) {
3640 setForceExclusive();
3641 } else if (cmd.equals("-timelag")) {
3642 if (i == args.length - 1) {
3643 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
3644 return printUsageAndExit();
3646 try {
3647 long timelag = Long.parseLong(args[++i]);
3648 setTimeLag(timelag);
3649 } catch (NumberFormatException e) {
3650 errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
3651 return printUsageAndExit();
3653 } else if (cmd.equals("-sleepBeforeRerun")) {
3654 if (i == args.length - 1) {
3655 errors.reportError(ERROR_CODE.WRONG_USAGE,
3656 "HBaseFsck: -sleepBeforeRerun needs a value.");
3657 return printUsageAndExit();
3659 try {
3660 sleepBeforeRerun = Long.parseLong(args[++i]);
3661 } catch (NumberFormatException e) {
3662 errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
3663 return printUsageAndExit();
3665 } else if (cmd.equals("-sidelineDir")) {
3666 if (i == args.length - 1) {
3667 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
3668 return printUsageAndExit();
3670 setSidelineDir(args[++i]);
3671 } else if (cmd.equals("-fix")) {
3672 errors.reportError(ERROR_CODE.WRONG_USAGE,
3673 "This option is deprecated, please use -fixAssignments instead.");
3674 setFixAssignments(true);
3675 } else if (cmd.equals("-fixAssignments")) {
3676 setFixAssignments(true);
3677 } else if (cmd.equals("-fixMeta")) {
3678 setFixMeta(true);
3679 } else if (cmd.equals("-noHdfsChecking")) {
3680 setCheckHdfs(false);
3681 } else if (cmd.equals("-fixHdfsHoles")) {
3682 setFixHdfsHoles(true);
3683 } else if (cmd.equals("-fixHdfsOrphans")) {
3684 setFixHdfsOrphans(true);
3685 } else if (cmd.equals("-fixTableOrphans")) {
3686 setFixTableOrphans(true);
3687 } else if (cmd.equals("-fixHdfsOverlaps")) {
3688 setFixHdfsOverlaps(true);
3689 } else if (cmd.equals("-fixVersionFile")) {
3690 setFixVersionFile(true);
3691 } else if (cmd.equals("-sidelineBigOverlaps")) {
3692 setSidelineBigOverlaps(true);
3693 } else if (cmd.equals("-fixSplitParents")) {
3694 setFixSplitParents(true);
3695 } else if (cmd.equals("-removeParents")) {
3696 setRemoveParents(true);
3697 } else if (cmd.equals("-ignorePreCheckPermission")) {
3698 setIgnorePreCheckPermission(true);
3699 } else if (cmd.equals("-checkCorruptHFiles")) {
3700 checkCorruptHFiles = true;
3701 } else if (cmd.equals("-sidelineCorruptHFiles")) {
3702 sidelineCorruptHFiles = true;
3703 } else if (cmd.equals("-fixReferenceFiles")) {
3704 setFixReferenceFiles(true);
3705 } else if (cmd.equals("-fixHFileLinks")) {
3706 setFixHFileLinks(true);
3707 } else if (cmd.equals("-fixEmptyMetaCells")) {
3708 setFixEmptyMetaCells(true);
3709 } else if (cmd.equals("-repair")) {
3710 // this attempts to merge overlapping hdfs regions, needs testing
3711 // under load
3712 setFixHdfsHoles(true);
3713 setFixHdfsOrphans(true);
3714 setFixMeta(true);
3715 setFixAssignments(true);
3716 setFixHdfsOverlaps(true);
3717 setFixVersionFile(true);
3718 setSidelineBigOverlaps(true);
3719 setFixSplitParents(false);
3720 setCheckHdfs(true);
3721 setFixReferenceFiles(true);
3722 setFixHFileLinks(true);
3723 } else if (cmd.equals("-repairHoles")) {
3724 // this will make all missing hdfs regions available but may lose data
3725 setFixHdfsHoles(true);
3726 setFixHdfsOrphans(false);
3727 setFixMeta(true);
3728 setFixAssignments(true);
3729 setFixHdfsOverlaps(false);
3730 setSidelineBigOverlaps(false);
3731 setFixSplitParents(false);
3732 setCheckHdfs(true);
3733 } else if (cmd.equals("-maxOverlapsToSideline")) {
3734 if (i == args.length - 1) {
3735 errors.reportError(ERROR_CODE.WRONG_USAGE,
3736 "-maxOverlapsToSideline needs a numeric value argument.");
3737 return printUsageAndExit();
3739 try {
3740 int maxOverlapsToSideline = Integer.parseInt(args[++i]);
3741 setMaxOverlapsToSideline(maxOverlapsToSideline);
3742 } catch (NumberFormatException e) {
3743 errors.reportError(ERROR_CODE.WRONG_USAGE,
3744 "-maxOverlapsToSideline needs a numeric value argument.");
3745 return printUsageAndExit();
3747 } else if (cmd.equals("-maxMerge")) {
3748 if (i == args.length - 1) {
3749 errors.reportError(ERROR_CODE.WRONG_USAGE,
3750 "-maxMerge needs a numeric value argument.");
3751 return printUsageAndExit();
3753 try {
3754 int maxMerge = Integer.parseInt(args[++i]);
3755 setMaxMerge(maxMerge);
3756 } catch (NumberFormatException e) {
3757 errors.reportError(ERROR_CODE.WRONG_USAGE,
3758 "-maxMerge needs a numeric value argument.");
3759 return printUsageAndExit();
3761 } else if (cmd.equals("-summary")) {
3762 setSummary();
3763 } else if (cmd.equals("-metaonly")) {
3764 setCheckMetaOnly();
3765 } else if (cmd.equals("-boundaries")) {
3766 setRegionBoundariesCheck();
3767 } else if (cmd.equals("-fixReplication")) {
3768 setFixReplication(true);
3769 } else if (cmd.equals("-cleanReplicationBarrier")) {
3770 setCleanReplicationBarrier(true);
3771 if(args[++i].startsWith("-")){
3772 printUsageAndExit();
3774 setCleanReplicationBarrierTable(args[i]);
3775 } else if (cmd.startsWith("-")) {
3776 errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
3777 return printUsageAndExit();
3778 } else {
3779 includeTable(TableName.valueOf(cmd));
3780 errors.print("Allow checking/fixes for table: " + cmd);
3784 errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
3786 // pre-check current user has FS write permission or not
3787 try {
3788 preCheckPermission();
3789 } catch (IOException ioe) {
3790 Runtime.getRuntime().exit(-1);
3793 // do the real work of hbck
3794 connect();
3796 // after connecting to server above, we have server version
3797 // check if unsupported option is specified based on server version
3798 if (!isOptionsSupported(args)) {
3799 return printUsageAndExit();
3802 try {
3803 // if corrupt file mode is on, first fix them since they may be opened later
3804 if (checkCorruptHFiles || sidelineCorruptHFiles) {
3805 LOG.info("Checking all hfiles for corruption");
3806 HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
3807 setHFileCorruptionChecker(hfcc); // so we can get result
3808 Collection<TableName> tables = getIncludedTables();
3809 Collection<Path> tableDirs = new ArrayList<>();
3810 Path rootdir = CommonFSUtils.getRootDir(getConf());
3811 if (tables.size() > 0) {
3812 for (TableName t : tables) {
3813 tableDirs.add(CommonFSUtils.getTableDir(rootdir, t));
3815 } else {
3816 tableDirs = FSUtils.getTableDirs(CommonFSUtils.getCurrentFileSystem(getConf()), rootdir);
3818 hfcc.checkTables(tableDirs);
3819 hfcc.report(errors);
3822 // check and fix table integrity, region consistency.
3823 int code = onlineHbck();
3824 setRetCode(code);
3825 // If we have changed the HBase state it is better to run hbck again
3826 // to see if we haven't broken something else in the process.
3827 // We run it only once more because otherwise we can easily fall into
3828 // an infinite loop.
3829 if (shouldRerun()) {
3830 try {
3831 LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
3832 Thread.sleep(sleepBeforeRerun);
3833 } catch (InterruptedException ie) {
3834 LOG.warn("Interrupted while sleeping");
3835 return this;
3837 // Just report
3838 setFixAssignments(false);
3839 setFixMeta(false);
3840 setFixHdfsHoles(false);
3841 setFixHdfsOverlaps(false);
3842 setFixVersionFile(false);
3843 setFixTableOrphans(false);
3844 errors.resetErrors();
3845 code = onlineHbck();
3846 setRetCode(code);
3848 } finally {
3849 IOUtils.closeQuietly(this, e -> LOG.warn("", e));
3851 return this;
3854 private boolean isOptionsSupported(String[] args) {
3855 boolean result = true;
3856 String hbaseServerVersion = status.getHBaseVersion();
3857 if (VersionInfo.compareVersion("2.any.any", hbaseServerVersion) < 0) {
3858 // Process command-line args.
3859 for (String arg : args) {
3860 if (unsupportedOptionsInV2.contains(arg)) {
3861 errors.reportError(ERROR_CODE.UNSUPPORTED_OPTION,
3862 "option '" + arg + "' is not " + "supported!");
3863 result = false;
3864 break;
3868 return result;
3871 public void setCleanReplicationBarrierTable(String cleanReplicationBarrierTable) {
3872 this.cleanReplicationBarrierTable = TableName.valueOf(cleanReplicationBarrierTable);
3875 public void cleanReplicationBarrier() throws IOException {
3876 if (!cleanReplicationBarrier || cleanReplicationBarrierTable == null) {
3877 return;
3879 if (cleanReplicationBarrierTable.isSystemTable()) {
3880 errors.reportError(ERROR_CODE.INVALID_TABLE,
3881 "invalid table: " + cleanReplicationBarrierTable);
3882 return;
3885 boolean isGlobalScope = false;
3886 try {
3887 isGlobalScope = admin.getDescriptor(cleanReplicationBarrierTable).hasGlobalReplicationScope();
3888 } catch (TableNotFoundException e) {
3889 LOG.info("we may need to clean some erroneous data due to bugs");
3892 if (isGlobalScope) {
3893 errors.reportError(ERROR_CODE.INVALID_TABLE,
3894 "table's replication scope is global: " + cleanReplicationBarrierTable);
3895 return;
3897 List<byte[]> regionNames = new ArrayList<>();
3898 Scan barrierScan = new Scan();
3899 barrierScan.setCaching(100);
3900 barrierScan.addFamily(HConstants.REPLICATION_BARRIER_FAMILY);
3901 barrierScan
3902 .withStartRow(ClientMetaTableAccessor.getTableStartRowForMeta(cleanReplicationBarrierTable,
3903 ClientMetaTableAccessor.QueryType.REGION))
3904 .withStopRow(ClientMetaTableAccessor.getTableStopRowForMeta(cleanReplicationBarrierTable,
3905 ClientMetaTableAccessor.QueryType.REGION));
3906 Result result;
3907 try (ResultScanner scanner = meta.getScanner(barrierScan)) {
3908 while ((result = scanner.next()) != null) {
3909 regionNames.add(result.getRow());
3912 if (regionNames.size() <= 0) {
3913 errors.reportError(ERROR_CODE.INVALID_TABLE,
3914 "there is no barriers of this table: " + cleanReplicationBarrierTable);
3915 return;
3917 ReplicationQueueStorage queueStorage =
3918 ReplicationStorageFactory.getReplicationQueueStorage(zkw, getConf());
3919 List<ReplicationPeerDescription> peerDescriptions = admin.listReplicationPeers();
3920 if (peerDescriptions != null && peerDescriptions.size() > 0) {
3921 List<String> peers = peerDescriptions.stream()
3922 .filter(peerConfig -> peerConfig.getPeerConfig()
3923 .needToReplicate(cleanReplicationBarrierTable))
3924 .map(peerConfig -> peerConfig.getPeerId()).collect(Collectors.toList());
3925 try {
3926 List<String> batch = new ArrayList<>();
3927 for (String peer : peers) {
3928 for (byte[] regionName : regionNames) {
3929 batch.add(RegionInfo.encodeRegionName(regionName));
3930 if (batch.size() % 100 == 0) {
3931 queueStorage.removeLastSequenceIds(peer, batch);
3932 batch.clear();
3935 if (batch.size() > 0) {
3936 queueStorage.removeLastSequenceIds(peer, batch);
3937 batch.clear();
3940 } catch (ReplicationException re) {
3941 throw new IOException(re);
3944 for (byte[] regionName : regionNames) {
3945 meta.delete(new Delete(regionName).addFamily(HConstants.REPLICATION_BARRIER_FAMILY));
3947 setShouldRerun();
3951 * ls -r for debugging purposes
3953 void debugLsr(Path p) throws IOException {
3954 debugLsr(getConf(), p, errors);
3958 * ls -r for debugging purposes
3960 public static void debugLsr(Configuration conf,
3961 Path p) throws IOException {
3962 debugLsr(conf, p, new PrintingErrorReporter());
3966 * ls -r for debugging purposes
3968 public static void debugLsr(Configuration conf,
3969 Path p, HbckErrorReporter errors) throws IOException {
3970 if (!LOG.isDebugEnabled() || p == null) {
3971 return;
3973 FileSystem fs = p.getFileSystem(conf);
3975 if (!fs.exists(p)) {
3976 // nothing
3977 return;
3979 errors.print(p.toString());
3981 if (fs.isFile(p)) {
3982 return;
3985 if (fs.getFileStatus(p).isDirectory()) {
3986 FileStatus[] fss= fs.listStatus(p);
3987 for (FileStatus status : fss) {
3988 debugLsr(conf, status.getPath(), errors);