HBASE-21843 RegionGroupingProvider breaks the meta wal file name pattern which may...
[hbase.git] / hbase-server / src / main / java / org / apache / hadoop / hbase / util / HBaseFsck.java
blob9e5f9e8c9177d309614ac7aaca46ef618bc6d7ef
1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org.apache.hadoop.hbase.util;
20 import java.io.Closeable;
21 import java.io.FileNotFoundException;
22 import java.io.IOException;
23 import java.io.InterruptedIOException;
24 import java.io.PrintWriter;
25 import java.io.StringWriter;
26 import java.net.InetAddress;
27 import java.net.URI;
28 import java.util.ArrayList;
29 import java.util.Arrays;
30 import java.util.Collection;
31 import java.util.Collections;
32 import java.util.Comparator;
33 import java.util.EnumSet;
34 import java.util.HashMap;
35 import java.util.HashSet;
36 import java.util.Iterator;
37 import java.util.List;
38 import java.util.Locale;
39 import java.util.Map;
40 import java.util.Map.Entry;
41 import java.util.Objects;
42 import java.util.Optional;
43 import java.util.Set;
44 import java.util.SortedMap;
45 import java.util.SortedSet;
46 import java.util.TreeMap;
47 import java.util.TreeSet;
48 import java.util.Vector;
49 import java.util.concurrent.Callable;
50 import java.util.concurrent.ConcurrentSkipListMap;
51 import java.util.concurrent.ExecutionException;
52 import java.util.concurrent.ExecutorService;
53 import java.util.concurrent.Executors;
54 import java.util.concurrent.Future;
55 import java.util.concurrent.FutureTask;
56 import java.util.concurrent.ScheduledThreadPoolExecutor;
57 import java.util.concurrent.TimeUnit;
58 import java.util.concurrent.TimeoutException;
59 import java.util.concurrent.atomic.AtomicBoolean;
60 import java.util.concurrent.atomic.AtomicInteger;
61 import java.util.stream.Collectors;
62 import org.apache.commons.io.IOUtils;
63 import org.apache.commons.lang3.RandomStringUtils;
64 import org.apache.commons.lang3.StringUtils;
65 import org.apache.hadoop.conf.Configuration;
66 import org.apache.hadoop.conf.Configured;
67 import org.apache.hadoop.fs.FSDataOutputStream;
68 import org.apache.hadoop.fs.FileStatus;
69 import org.apache.hadoop.fs.FileSystem;
70 import org.apache.hadoop.fs.Path;
71 import org.apache.hadoop.fs.permission.FsAction;
72 import org.apache.hadoop.fs.permission.FsPermission;
73 import org.apache.hadoop.hbase.Abortable;
74 import org.apache.hadoop.hbase.Cell;
75 import org.apache.hadoop.hbase.CellUtil;
76 import org.apache.hadoop.hbase.ClusterMetrics;
77 import org.apache.hadoop.hbase.ClusterMetrics.Option;
78 import org.apache.hadoop.hbase.HBaseConfiguration;
79 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
80 import org.apache.hadoop.hbase.HConstants;
81 import org.apache.hadoop.hbase.HRegionInfo;
82 import org.apache.hadoop.hbase.HRegionLocation;
83 import org.apache.hadoop.hbase.KeyValue;
84 import org.apache.hadoop.hbase.MasterNotRunningException;
85 import org.apache.hadoop.hbase.MetaTableAccessor;
86 import org.apache.hadoop.hbase.RegionLocations;
87 import org.apache.hadoop.hbase.ServerName;
88 import org.apache.hadoop.hbase.TableName;
89 import org.apache.hadoop.hbase.TableNotFoundException;
90 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
91 import org.apache.hadoop.hbase.client.Admin;
92 import org.apache.hadoop.hbase.client.ClusterConnection;
93 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
94 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
95 import org.apache.hadoop.hbase.client.Connection;
96 import org.apache.hadoop.hbase.client.ConnectionFactory;
97 import org.apache.hadoop.hbase.client.Delete;
98 import org.apache.hadoop.hbase.client.Get;
99 import org.apache.hadoop.hbase.client.Put;
100 import org.apache.hadoop.hbase.client.RegionInfo;
101 import org.apache.hadoop.hbase.client.RegionInfoBuilder;
102 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
103 import org.apache.hadoop.hbase.client.Result;
104 import org.apache.hadoop.hbase.client.ResultScanner;
105 import org.apache.hadoop.hbase.client.RowMutations;
106 import org.apache.hadoop.hbase.client.Scan;
107 import org.apache.hadoop.hbase.client.Table;
108 import org.apache.hadoop.hbase.client.TableDescriptor;
109 import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
110 import org.apache.hadoop.hbase.client.TableState;
111 import org.apache.hadoop.hbase.io.FileLink;
112 import org.apache.hadoop.hbase.io.HFileLink;
113 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
114 import org.apache.hadoop.hbase.io.hfile.HFile;
115 import org.apache.hadoop.hbase.log.HBaseMarkers;
116 import org.apache.hadoop.hbase.master.MasterFileSystem;
117 import org.apache.hadoop.hbase.master.RegionState;
118 import org.apache.hadoop.hbase.regionserver.HRegion;
119 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
120 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
121 import org.apache.hadoop.hbase.replication.ReplicationException;
122 import org.apache.hadoop.hbase.replication.ReplicationPeerDescription;
123 import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
124 import org.apache.hadoop.hbase.replication.ReplicationStorageFactory;
125 import org.apache.hadoop.hbase.replication.ReplicationUtils;
126 import org.apache.hadoop.hbase.security.AccessDeniedException;
127 import org.apache.hadoop.hbase.security.UserProvider;
128 import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
129 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
130 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
131 import org.apache.hadoop.hbase.util.hbck.ReplicationChecker;
132 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
133 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
134 import org.apache.hadoop.hbase.wal.WAL;
135 import org.apache.hadoop.hbase.wal.WALFactory;
136 import org.apache.hadoop.hbase.wal.WALSplitter;
137 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
138 import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
139 import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
140 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
141 import org.apache.hadoop.ipc.RemoteException;
142 import org.apache.hadoop.security.UserGroupInformation;
143 import org.apache.hadoop.util.ReflectionUtils;
144 import org.apache.hadoop.util.Tool;
145 import org.apache.hadoop.util.ToolRunner;
146 import org.apache.yetus.audience.InterfaceAudience;
147 import org.apache.yetus.audience.InterfaceStability;
148 import org.apache.zookeeper.KeeperException;
149 import org.slf4j.Logger;
150 import org.slf4j.LoggerFactory;
152 import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
153 import org.apache.hbase.thirdparty.com.google.common.base.Joiner;
154 import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
155 import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList;
156 import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
157 import org.apache.hbase.thirdparty.com.google.common.collect.Multimap;
158 import org.apache.hbase.thirdparty.com.google.common.collect.Ordering;
159 import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
160 import org.apache.hbase.thirdparty.com.google.common.collect.TreeMultimap;
162 import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
163 import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
166 * HBaseFsck (hbck) is a tool for checking and repairing region consistency and
167 * table integrity problems in a corrupted HBase. This tool was written for hbase-1.x. It does not
168 * work with hbase-2.x; it can read state but is not allowed to change state; i.e. effect 'repair'.
169 * See hbck2 (HBASE-19121) for a hbck tool for hbase2.
171 * <p>
172 * Region consistency checks verify that hbase:meta, region deployment on region
173 * servers and the state of data in HDFS (.regioninfo files) all are in
174 * accordance.
175 * <p>
176 * Table integrity checks verify that all possible row keys resolve to exactly
177 * one region of a table. This means there are no individual degenerate
178 * or backwards regions; no holes between regions; and that there are no
179 * overlapping regions.
180 * <p>
181 * The general repair strategy works in two phases:
182 * <ol>
183 * <li> Repair Table Integrity on HDFS. (merge or fabricate regions)
184 * <li> Repair Region Consistency with hbase:meta and assignments
185 * </ol>
186 * <p>
187 * For table integrity repairs, the tables' region directories are scanned
188 * for .regioninfo files. Each table's integrity is then verified. If there
189 * are any orphan regions (regions with no .regioninfo files) or holes, new
190 * regions are fabricated. Backwards regions are sidelined as well as empty
191 * degenerate (endkey==startkey) regions. If there are any overlapping regions,
192 * a new region is created and all data is merged into the new region.
193 * <p>
194 * Table integrity repairs deal solely with HDFS and could potentially be done
195 * offline -- the hbase region servers or master do not need to be running.
196 * This phase can eventually be used to completely reconstruct the hbase:meta table in
197 * an offline fashion.
198 * <p>
199 * Region consistency requires three conditions -- 1) valid .regioninfo file
200 * present in an HDFS region dir, 2) valid row with .regioninfo data in META,
201 * and 3) a region is deployed only at the regionserver that was assigned to
202 * with proper state in the master.
203 * <p>
204 * Region consistency repairs require hbase to be online so that hbck can
205 * contact the HBase master and region servers. The hbck#connect() method must
206 * first be called successfully. Much of the region consistency information
207 * is transient and less risky to repair.
208 * <p>
209 * If hbck is run from the command line, there are a handful of arguments that
210 * can be used to limit the kinds of repairs hbck will do. See the code in
211 * {@link #printUsageAndExit()} for more details.
213 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
214 @InterfaceStability.Evolving
215 public class HBaseFsck extends Configured implements Closeable {
216 public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
217 public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
218 private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
219 private static boolean rsSupportsOffline = true;
220 private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
221 private static final int DEFAULT_MAX_MERGE = 5;
222 private static final String TO_BE_LOADED = "to_be_loaded";
224 * Here is where hbase-1.x used to default the lock for hbck1.
225 * It puts in place a lock when it goes to write/make changes.
227 @VisibleForTesting
228 public static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
229 private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS = 5;
230 private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
231 private static final int DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
232 // We have to set the timeout value > HdfsConstants.LEASE_SOFTLIMIT_PERIOD.
233 // In HADOOP-2.6 and later, the Namenode proxy now created with custom RetryPolicy for
234 // AlreadyBeingCreatedException which is implies timeout on this operations up to
235 // HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds).
236 private static final int DEFAULT_WAIT_FOR_LOCK_TIMEOUT = 80; // seconds
237 private static final int DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS = 5;
238 private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
239 private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
241 /**********************
242 * Internal resources
243 **********************/
244 private static final Logger LOG = LoggerFactory.getLogger(HBaseFsck.class.getName());
245 private ClusterMetrics status;
246 private ClusterConnection connection;
247 private Admin admin;
248 private Table meta;
249 // threads to do ||izable tasks: retrieve data from regionservers, handle overlapping regions
250 protected ExecutorService executor;
251 private long startMillis = EnvironmentEdgeManager.currentTime();
252 private HFileCorruptionChecker hfcc;
253 private int retcode = 0;
254 private Path HBCK_LOCK_PATH;
255 private FSDataOutputStream hbckOutFd;
256 // This lock is to prevent cleanup of balancer resources twice between
257 // ShutdownHook and the main code. We cleanup only if the connect() is
258 // successful
259 private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
261 // Unsupported options in HBase 2.0+
262 private static final Set<String> unsupportedOptionsInV2 = Sets.newHashSet("-fix",
263 "-fixAssignments", "-fixMeta", "-fixHdfsHoles", "-fixHdfsOrphans", "-fixTableOrphans",
264 "-fixHdfsOverlaps", "-sidelineBigOverlaps", "-fixSplitParents", "-removeParents",
265 "-fixEmptyMetaCells", "-repair", "-repairHoles", "-maxOverlapsToSideline", "-maxMerge");
267 /***********
268 * Options
269 ***********/
270 private static boolean details = false; // do we display the full report
271 private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
272 private static boolean forceExclusive = false; // only this hbck can modify HBase
273 private boolean fixAssignments = false; // fix assignment errors?
274 private boolean fixMeta = false; // fix meta errors?
275 private boolean checkHdfs = true; // load and check fs consistency?
276 private boolean fixHdfsHoles = false; // fix fs holes?
277 private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
278 private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
279 private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
280 private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
281 private boolean fixSplitParents = false; // fix lingering split parents
282 private boolean removeParents = false; // remove split parents
283 private boolean fixReferenceFiles = false; // fix lingering reference store file
284 private boolean fixHFileLinks = false; // fix lingering HFileLinks
285 private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
286 private boolean fixReplication = false; // fix undeleted replication queues for removed peer
287 private boolean cleanReplicationBarrier = false; // clean replication barriers of a table
288 private boolean fixAny = false; // Set to true if any of the fix is required.
290 // limit checking/fixes to listed tables, if empty attempt to check/fix all
291 // hbase:meta are always checked
292 private Set<TableName> tablesIncluded = new HashSet<>();
293 private TableName cleanReplicationBarrierTable;
294 private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
295 // maximum number of overlapping regions to sideline
296 private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE;
297 private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
298 private Path sidelineDir = null;
300 private boolean rerun = false; // if we tried to fix something, rerun hbck
301 private static boolean summary = false; // if we want to print less output
302 private boolean checkMetaOnly = false;
303 private boolean checkRegionBoundaries = false;
304 private boolean ignorePreCheckPermission = false; // if pre-check permission
306 /*********
307 * State
308 *********/
309 final private ErrorReporter errors;
310 int fixes = 0;
313 * This map contains the state of all hbck items. It maps from encoded region
314 * name to HbckInfo structure. The information contained in HbckInfo is used
315 * to detect and correct consistency (hdfs/meta/deployment) problems.
317 private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<>();
318 // Empty regioninfo qualifiers in hbase:meta
319 private Set<Result> emptyRegionInfoQualifiers = new HashSet<>();
322 * This map from Tablename -> TableInfo contains the structures necessary to
323 * detect table consistency problems (holes, dupes, overlaps). It is sorted
324 * to prevent dupes.
326 * If tablesIncluded is empty, this map contains all tables.
327 * Otherwise, it contains only meta tables and tables in tablesIncluded,
328 * unless checkMetaOnly is specified, in which case, it contains only
329 * the meta table
331 private SortedMap<TableName, TableInfo> tablesInfo = new ConcurrentSkipListMap<>();
334 * When initially looking at HDFS, we attempt to find any orphaned data.
336 private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
338 private Map<TableName, Set<String>> orphanTableDirs = new HashMap<>();
339 private Map<TableName, TableState> tableStates = new HashMap<>();
340 private final RetryCounterFactory lockFileRetryCounterFactory;
341 private final RetryCounterFactory createZNodeRetryCounterFactory;
343 private Map<TableName, Set<String>> skippedRegions = new HashMap<>();
345 private ZKWatcher zkw = null;
346 private String hbckEphemeralNodePath = null;
347 private boolean hbckZodeCreated = false;
350 * Constructor
352 * @param conf Configuration object
353 * @throws MasterNotRunningException if the master is not running
354 * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
356 public HBaseFsck(Configuration conf) throws IOException, ClassNotFoundException {
357 this(conf, createThreadPool(conf));
360 private static ExecutorService createThreadPool(Configuration conf) {
361 int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
362 return new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck"));
366 * Constructor
368 * @param conf
369 * Configuration object
370 * @throws MasterNotRunningException
371 * if the master is not running
372 * @throws ZooKeeperConnectionException
373 * if unable to connect to ZooKeeper
375 public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
376 ZooKeeperConnectionException, IOException, ClassNotFoundException {
377 super(conf);
378 errors = getErrorReporter(getConf());
379 this.executor = exec;
380 lockFileRetryCounterFactory = createLockRetryCounterFactory(getConf());
381 createZNodeRetryCounterFactory = createZnodeRetryCounterFactory(getConf());
382 zkw = createZooKeeperWatcher();
386 * @return A retry counter factory configured for retrying lock file creation.
388 public static RetryCounterFactory createLockRetryCounterFactory(Configuration conf) {
389 return new RetryCounterFactory(
390 conf.getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS),
391 conf.getInt("hbase.hbck.lockfile.attempt.sleep.interval",
392 DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL),
393 conf.getInt("hbase.hbck.lockfile.attempt.maxsleeptime",
394 DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME));
398 * @return A retry counter factory configured for retrying znode creation.
400 private static RetryCounterFactory createZnodeRetryCounterFactory(Configuration conf) {
401 return new RetryCounterFactory(
402 conf.getInt("hbase.hbck.createznode.attempts", DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS),
403 conf.getInt("hbase.hbck.createznode.attempt.sleep.interval",
404 DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL),
405 conf.getInt("hbase.hbck.createznode.attempt.maxsleeptime",
406 DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME));
410 * @return Return the tmp dir this tool writes too.
412 @VisibleForTesting
413 public static Path getTmpDir(Configuration conf) throws IOException {
414 return new Path(FSUtils.getRootDir(conf), HConstants.HBASE_TEMP_DIRECTORY);
417 private static class FileLockCallable implements Callable<FSDataOutputStream> {
418 RetryCounter retryCounter;
419 private final Configuration conf;
420 private Path hbckLockPath = null;
422 public FileLockCallable(Configuration conf, RetryCounter retryCounter) {
423 this.retryCounter = retryCounter;
424 this.conf = conf;
428 * @return Will be <code>null</code> unless you call {@link #call()}
430 Path getHbckLockPath() {
431 return this.hbckLockPath;
434 @Override
435 public FSDataOutputStream call() throws IOException {
436 try {
437 FileSystem fs = FSUtils.getCurrentFileSystem(this.conf);
438 FsPermission defaultPerms = FSUtils.getFilePermissions(fs, this.conf,
439 HConstants.DATA_FILE_UMASK_KEY);
440 Path tmpDir = getTmpDir(conf);
441 this.hbckLockPath = new Path(tmpDir, HBCK_LOCK_FILE);
442 fs.mkdirs(tmpDir);
443 final FSDataOutputStream out = createFileWithRetries(fs, this.hbckLockPath, defaultPerms);
444 out.writeBytes(InetAddress.getLocalHost().toString());
445 // Add a note into the file we write on why hbase2 is writing out an hbck1 lock file.
446 out.writeBytes(" Written by an hbase-2.x Master to block an " +
447 "attempt by an hbase-1.x HBCK tool making modification to state. " +
448 "See 'HBCK must match HBase server version' in the hbase refguide.");
449 out.flush();
450 return out;
451 } catch(RemoteException e) {
452 if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
453 return null;
454 } else {
455 throw e;
460 private FSDataOutputStream createFileWithRetries(final FileSystem fs,
461 final Path hbckLockFilePath, final FsPermission defaultPerms)
462 throws IOException {
463 IOException exception = null;
464 do {
465 try {
466 return FSUtils.create(fs, hbckLockFilePath, defaultPerms, false);
467 } catch (IOException ioe) {
468 LOG.info("Failed to create lock file " + hbckLockFilePath.getName()
469 + ", try=" + (retryCounter.getAttemptTimes() + 1) + " of "
470 + retryCounter.getMaxAttempts());
471 LOG.debug("Failed to create lock file " + hbckLockFilePath.getName(),
472 ioe);
473 try {
474 exception = ioe;
475 retryCounter.sleepUntilNextRetry();
476 } catch (InterruptedException ie) {
477 throw (InterruptedIOException) new InterruptedIOException(
478 "Can't create lock file " + hbckLockFilePath.getName())
479 .initCause(ie);
482 } while (retryCounter.shouldRetry());
484 throw exception;
489 * This method maintains a lock using a file. If the creation fails we return null
491 * @return FSDataOutputStream object corresponding to the newly opened lock file
492 * @throws IOException if IO failure occurs
494 public static Pair<Path, FSDataOutputStream> checkAndMarkRunningHbck(Configuration conf,
495 RetryCounter retryCounter) throws IOException {
496 FileLockCallable callable = new FileLockCallable(conf, retryCounter);
497 ExecutorService executor = Executors.newFixedThreadPool(1);
498 FutureTask<FSDataOutputStream> futureTask = new FutureTask<>(callable);
499 executor.execute(futureTask);
500 final int timeoutInSeconds = conf.getInt(
501 "hbase.hbck.lockfile.maxwaittime", DEFAULT_WAIT_FOR_LOCK_TIMEOUT);
502 FSDataOutputStream stream = null;
503 try {
504 stream = futureTask.get(timeoutInSeconds, TimeUnit.SECONDS);
505 } catch (ExecutionException ee) {
506 LOG.warn("Encountered exception when opening lock file", ee);
507 } catch (InterruptedException ie) {
508 LOG.warn("Interrupted when opening lock file", ie);
509 Thread.currentThread().interrupt();
510 } catch (TimeoutException exception) {
511 // took too long to obtain lock
512 LOG.warn("Took more than " + timeoutInSeconds + " seconds in obtaining lock");
513 futureTask.cancel(true);
514 } finally {
515 executor.shutdownNow();
517 return new Pair<Path, FSDataOutputStream>(callable.getHbckLockPath(), stream);
520 private void unlockHbck() {
521 if (isExclusive() && hbckLockCleanup.compareAndSet(true, false)) {
522 RetryCounter retryCounter = lockFileRetryCounterFactory.create();
523 do {
524 try {
525 IOUtils.closeQuietly(hbckOutFd);
526 FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()), HBCK_LOCK_PATH, true);
527 LOG.info("Finishing hbck");
528 return;
529 } catch (IOException ioe) {
530 LOG.info("Failed to delete " + HBCK_LOCK_PATH + ", try="
531 + (retryCounter.getAttemptTimes() + 1) + " of "
532 + retryCounter.getMaxAttempts());
533 LOG.debug("Failed to delete " + HBCK_LOCK_PATH, ioe);
534 try {
535 retryCounter.sleepUntilNextRetry();
536 } catch (InterruptedException ie) {
537 Thread.currentThread().interrupt();
538 LOG.warn("Interrupted while deleting lock file" +
539 HBCK_LOCK_PATH);
540 return;
543 } while (retryCounter.shouldRetry());
548 * To repair region consistency, one must call connect() in order to repair
549 * online state.
551 public void connect() throws IOException {
553 if (isExclusive()) {
554 // Grab the lock
555 Pair<Path, FSDataOutputStream> pair =
556 checkAndMarkRunningHbck(getConf(), this.lockFileRetryCounterFactory.create());
557 HBCK_LOCK_PATH = pair.getFirst();
558 this.hbckOutFd = pair.getSecond();
559 if (hbckOutFd == null) {
560 setRetCode(-1);
561 LOG.error("Another instance of hbck is fixing HBase, exiting this instance. " +
562 "[If you are sure no other instance is running, delete the lock file " +
563 HBCK_LOCK_PATH + " and rerun the tool]");
564 throw new IOException("Duplicate hbck - Abort");
567 // Make sure to cleanup the lock
568 hbckLockCleanup.set(true);
572 // Add a shutdown hook to this thread, in case user tries to
573 // kill the hbck with a ctrl-c, we want to cleanup the lock so that
574 // it is available for further calls
575 Runtime.getRuntime().addShutdownHook(new Thread() {
576 @Override
577 public void run() {
578 IOUtils.closeQuietly(HBaseFsck.this);
579 cleanupHbckZnode();
580 unlockHbck();
584 LOG.info("Launching hbck");
586 connection = (ClusterConnection)ConnectionFactory.createConnection(getConf());
587 admin = connection.getAdmin();
588 meta = connection.getTable(TableName.META_TABLE_NAME);
589 status = admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS,
590 Option.DEAD_SERVERS, Option.MASTER, Option.BACKUP_MASTERS,
591 Option.REGIONS_IN_TRANSITION, Option.HBASE_VERSION));
595 * Get deployed regions according to the region servers.
597 private void loadDeployedRegions() throws IOException, InterruptedException {
598 // From the master, get a list of all known live region servers
599 Collection<ServerName> regionServers = status.getLiveServerMetrics().keySet();
600 errors.print("Number of live region servers: " + regionServers.size());
601 if (details) {
602 for (ServerName rsinfo: regionServers) {
603 errors.print(" " + rsinfo.getServerName());
607 // From the master, get a list of all dead region servers
608 Collection<ServerName> deadRegionServers = status.getDeadServerNames();
609 errors.print("Number of dead region servers: " + deadRegionServers.size());
610 if (details) {
611 for (ServerName name: deadRegionServers) {
612 errors.print(" " + name);
616 // Print the current master name and state
617 errors.print("Master: " + status.getMasterName());
619 // Print the list of all backup masters
620 Collection<ServerName> backupMasters = status.getBackupMasterNames();
621 errors.print("Number of backup masters: " + backupMasters.size());
622 if (details) {
623 for (ServerName name: backupMasters) {
624 errors.print(" " + name);
628 errors.print("Average load: " + status.getAverageLoad());
629 errors.print("Number of requests: " + status.getRequestCount());
630 errors.print("Number of regions: " + status.getRegionCount());
632 List<RegionState> rits = status.getRegionStatesInTransition();
633 errors.print("Number of regions in transition: " + rits.size());
634 if (details) {
635 for (RegionState state: rits) {
636 errors.print(" " + state.toDescriptiveString());
640 // Determine what's deployed
641 processRegionServers(regionServers);
645 * Clear the current state of hbck.
647 private void clearState() {
648 // Make sure regionInfo is empty before starting
649 fixes = 0;
650 regionInfoMap.clear();
651 emptyRegionInfoQualifiers.clear();
652 tableStates.clear();
653 errors.clear();
654 tablesInfo.clear();
655 orphanHdfsDirs.clear();
656 skippedRegions.clear();
660 * This repair method analyzes hbase data in hdfs and repairs it to satisfy
661 * the table integrity rules. HBase doesn't need to be online for this
662 * operation to work.
664 public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
665 // Initial pass to fix orphans.
666 if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
667 || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
668 LOG.info("Loading regioninfos HDFS");
669 // if nothing is happening this should always complete in two iterations.
670 int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
671 int curIter = 0;
672 do {
673 clearState(); // clears hbck state and reset fixes to 0 and.
674 // repair what's on HDFS
675 restoreHdfsIntegrity();
676 curIter++;// limit the number of iterations.
677 } while (fixes > 0 && curIter <= maxIterations);
679 // Repairs should be done in the first iteration and verification in the second.
680 // If there are more than 2 passes, something funny has happened.
681 if (curIter > 2) {
682 if (curIter == maxIterations) {
683 LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
684 + "Tables integrity may not be fully repaired!");
685 } else {
686 LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
693 * This repair method requires the cluster to be online since it contacts
694 * region servers and the masters. It makes each region's state in HDFS, in
695 * hbase:meta, and deployments consistent.
697 * @return If &gt; 0 , number of errors detected, if &lt; 0 there was an unrecoverable
698 * error. If 0, we have a clean hbase.
700 public int onlineConsistencyRepair() throws IOException, KeeperException,
701 InterruptedException {
703 // get regions according to what is online on each RegionServer
704 loadDeployedRegions();
705 // check whether hbase:meta is deployed and online
706 recordMetaRegion();
707 // Check if hbase:meta is found only once and in the right place
708 if (!checkMetaRegion()) {
709 String errorMsg = "hbase:meta table is not consistent. ";
710 if (shouldFixAssignments()) {
711 errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
712 } else {
713 errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
715 errors.reportError(errorMsg + " Exiting...");
716 return -2;
718 // Not going with further consistency check for tables when hbase:meta itself is not consistent.
719 LOG.info("Loading regionsinfo from the hbase:meta table");
720 boolean success = loadMetaEntries();
721 if (!success) return -1;
723 // Empty cells in hbase:meta?
724 reportEmptyMetaCells();
726 // Check if we have to cleanup empty REGIONINFO_QUALIFIER rows from hbase:meta
727 if (shouldFixEmptyMetaCells()) {
728 fixEmptyMetaCells();
731 // get a list of all tables that have not changed recently.
732 if (!checkMetaOnly) {
733 reportTablesInFlux();
736 // Get disabled tables states
737 loadTableStates();
739 // load regiondirs and regioninfos from HDFS
740 if (shouldCheckHdfs()) {
741 LOG.info("Loading region directories from HDFS");
742 loadHdfsRegionDirs();
743 LOG.info("Loading region information from HDFS");
744 loadHdfsRegionInfos();
747 // fix the orphan tables
748 fixOrphanTables();
750 LOG.info("Checking and fixing region consistency");
751 // Check and fix consistency
752 checkAndFixConsistency();
754 // Check integrity (does not fix)
755 checkIntegrity();
756 return errors.getErrorList().size();
760 * This method maintains an ephemeral znode. If the creation fails we return false or throw
761 * exception
763 * @return true if creating znode succeeds; false otherwise
764 * @throws IOException if IO failure occurs
766 private boolean setMasterInMaintenanceMode() throws IOException {
767 RetryCounter retryCounter = createZNodeRetryCounterFactory.create();
768 hbckEphemeralNodePath = ZNodePaths.joinZNode(
769 zkw.getZNodePaths().masterMaintZNode,
770 "hbck-" + Long.toString(EnvironmentEdgeManager.currentTime()));
771 do {
772 try {
773 hbckZodeCreated = ZKUtil.createEphemeralNodeAndWatch(zkw, hbckEphemeralNodePath, null);
774 if (hbckZodeCreated) {
775 break;
777 } catch (KeeperException e) {
778 if (retryCounter.getAttemptTimes() >= retryCounter.getMaxAttempts()) {
779 throw new IOException("Can't create znode " + hbckEphemeralNodePath, e);
781 // fall through and retry
784 LOG.warn("Fail to create znode " + hbckEphemeralNodePath + ", try=" +
785 (retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts());
787 try {
788 retryCounter.sleepUntilNextRetry();
789 } catch (InterruptedException ie) {
790 throw (InterruptedIOException) new InterruptedIOException(
791 "Can't create znode " + hbckEphemeralNodePath).initCause(ie);
793 } while (retryCounter.shouldRetry());
794 return hbckZodeCreated;
797 private void cleanupHbckZnode() {
798 try {
799 if (zkw != null && hbckZodeCreated) {
800 ZKUtil.deleteNode(zkw, hbckEphemeralNodePath);
801 hbckZodeCreated = false;
803 } catch (KeeperException e) {
804 // Ignore
805 if (!e.code().equals(KeeperException.Code.NONODE)) {
806 LOG.warn("Delete HBCK znode " + hbckEphemeralNodePath + " failed ", e);
812 * Contacts the master and prints out cluster-wide information
813 * @return 0 on success, non-zero on failure
815 public int onlineHbck()
816 throws IOException, KeeperException, InterruptedException, ReplicationException {
817 // print hbase server version
818 errors.print("Version: " + status.getHBaseVersion());
820 // Clean start
821 clearState();
822 // Do offline check and repair first
823 offlineHdfsIntegrityRepair();
824 offlineReferenceFileRepair();
825 offlineHLinkFileRepair();
826 // If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online
827 // hbck, it is likely that hbck would be misled and report transient errors. Therefore, it
828 // is better to set Master into maintenance mode during online hbck.
830 if (!setMasterInMaintenanceMode()) {
831 LOG.warn("HBCK is running while master is not in maintenance mode, you might see transient "
832 + "error. Please run HBCK multiple times to reduce the chance of transient error.");
835 onlineConsistencyRepair();
837 if (checkRegionBoundaries) {
838 checkRegionBoundaries();
841 checkAndFixReplication();
843 cleanReplicationBarrier();
845 // Remove the hbck znode
846 cleanupHbckZnode();
848 // Remove the hbck lock
849 unlockHbck();
851 // Print table summary
852 printTableSummary(tablesInfo);
853 return errors.summarize();
856 public static byte[] keyOnly (byte[] b) {
857 if (b == null)
858 return b;
859 int rowlength = Bytes.toShort(b, 0);
860 byte[] result = new byte[rowlength];
861 System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
862 return result;
865 @Override
866 public void close() throws IOException {
867 try {
868 cleanupHbckZnode();
869 unlockHbck();
870 } catch (Exception io) {
871 LOG.warn(io.toString(), io);
872 } finally {
873 if (zkw != null) {
874 zkw.close();
875 zkw = null;
877 IOUtils.closeQuietly(admin);
878 IOUtils.closeQuietly(meta);
879 IOUtils.closeQuietly(connection);
883 private static class RegionBoundariesInformation {
884 public byte [] regionName;
885 public byte [] metaFirstKey;
886 public byte [] metaLastKey;
887 public byte [] storesFirstKey;
888 public byte [] storesLastKey;
889 @Override
890 public String toString () {
891 return "regionName=" + Bytes.toStringBinary(regionName) +
892 "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
893 "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) +
894 "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) +
895 "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey);
899 public void checkRegionBoundaries() {
900 try {
901 ByteArrayComparator comparator = new ByteArrayComparator();
902 List<RegionInfo> regions = MetaTableAccessor.getAllRegions(connection, true);
903 final RegionBoundariesInformation currentRegionBoundariesInformation =
904 new RegionBoundariesInformation();
905 Path hbaseRoot = FSUtils.getRootDir(getConf());
906 for (RegionInfo regionInfo : regions) {
907 Path tableDir = FSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
908 currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
909 // For each region, get the start and stop key from the META and compare them to the
910 // same information from the Stores.
911 Path path = new Path(tableDir, regionInfo.getEncodedName());
912 FileSystem fs = path.getFileSystem(getConf());
913 FileStatus[] files = fs.listStatus(path);
914 // For all the column families in this region...
915 byte[] storeFirstKey = null;
916 byte[] storeLastKey = null;
917 for (FileStatus file : files) {
918 String fileName = file.getPath().toString();
919 fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
920 if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
921 FileStatus[] storeFiles = fs.listStatus(file.getPath());
922 // For all the stores in this column family.
923 for (FileStatus storeFile : storeFiles) {
924 HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(),
925 CacheConfig.DISABLED, true, getConf());
926 if ((reader.getFirstKey() != null)
927 && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
928 ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey().get()).getKey()) > 0))) {
929 storeFirstKey = ((KeyValue.KeyOnlyKeyValue)reader.getFirstKey().get()).getKey();
931 if ((reader.getLastKey() != null)
932 && ((storeLastKey == null) || (comparator.compare(storeLastKey,
933 ((KeyValue.KeyOnlyKeyValue)reader.getLastKey().get()).getKey())) < 0)) {
934 storeLastKey = ((KeyValue.KeyOnlyKeyValue)reader.getLastKey().get()).getKey();
936 reader.close();
940 currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
941 currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
942 currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
943 currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
944 if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
945 currentRegionBoundariesInformation.metaFirstKey = null;
946 if (currentRegionBoundariesInformation.metaLastKey.length == 0)
947 currentRegionBoundariesInformation.metaLastKey = null;
949 // For a region to be correct, we need the META start key to be smaller or equal to the
950 // smallest start key from all the stores, and the start key from the next META entry to
951 // be bigger than the last key from all the current stores. First region start key is null;
952 // Last region end key is null; some regions can be empty and not have any store.
954 boolean valid = true;
955 // Checking start key.
956 if ((currentRegionBoundariesInformation.storesFirstKey != null)
957 && (currentRegionBoundariesInformation.metaFirstKey != null)) {
958 valid = valid
959 && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
960 currentRegionBoundariesInformation.metaFirstKey) >= 0;
962 // Checking stop key.
963 if ((currentRegionBoundariesInformation.storesLastKey != null)
964 && (currentRegionBoundariesInformation.metaLastKey != null)) {
965 valid = valid
966 && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
967 currentRegionBoundariesInformation.metaLastKey) < 0;
969 if (!valid) {
970 errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
971 tablesInfo.get(regionInfo.getTable()));
972 LOG.warn("Region's boundaries not aligned between stores and META for:");
973 LOG.warn(Objects.toString(currentRegionBoundariesInformation));
976 } catch (IOException e) {
977 LOG.error(e.toString(), e);
982 * Iterates through the list of all orphan/invalid regiondirs.
984 private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
985 for (HbckInfo hi : orphanHdfsDirs) {
986 LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
987 adoptHdfsOrphan(hi);
992 * Orphaned regions are regions without a .regioninfo file in them. We "adopt"
993 * these orphans by creating a new region, and moving the column families,
994 * recovered edits, WALs, into the new region dir. We determine the region
995 * startkey and endkeys by looking at all of the hfiles inside the column
996 * families to identify the min and max keys. The resulting region will
997 * likely violate table integrity but will be dealt with by merging
998 * overlapping regions.
1000 @SuppressWarnings("deprecation")
1001 private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
1002 Path p = hi.getHdfsRegionDir();
1003 FileSystem fs = p.getFileSystem(getConf());
1004 FileStatus[] dirs = fs.listStatus(p);
1005 if (dirs == null) {
1006 LOG.warn("Attempt to adopt orphan hdfs region skipped because no files present in " +
1007 p + ". This dir could probably be deleted.");
1008 return ;
1011 TableName tableName = hi.getTableName();
1012 TableInfo tableInfo = tablesInfo.get(tableName);
1013 Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
1014 TableDescriptor template = tableInfo.getHTD();
1016 // find min and max key values
1017 Pair<byte[],byte[]> orphanRegionRange = null;
1018 for (FileStatus cf : dirs) {
1019 String cfName= cf.getPath().getName();
1020 // TODO Figure out what the special dirs are
1021 if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
1023 FileStatus[] hfiles = fs.listStatus(cf.getPath());
1024 for (FileStatus hfile : hfiles) {
1025 byte[] start, end;
1026 HFile.Reader hf = null;
1027 try {
1028 hf = HFile.createReader(fs, hfile.getPath(), CacheConfig.DISABLED, true, getConf());
1029 hf.loadFileInfo();
1030 Optional<Cell> startKv = hf.getFirstKey();
1031 start = CellUtil.cloneRow(startKv.get());
1032 Optional<Cell> endKv = hf.getLastKey();
1033 end = CellUtil.cloneRow(endKv.get());
1034 } catch (IOException ioe) {
1035 LOG.warn("Problem reading orphan file " + hfile + ", skipping");
1036 continue;
1037 } catch (NullPointerException ioe) {
1038 LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
1039 continue;
1040 } finally {
1041 if (hf != null) {
1042 hf.close();
1046 // expand the range to include the range of all hfiles
1047 if (orphanRegionRange == null) {
1048 // first range
1049 orphanRegionRange = new Pair<>(start, end);
1050 } else {
1051 // TODO add test
1053 // expand range only if the hfile is wider.
1054 if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
1055 orphanRegionRange.setFirst(start);
1057 if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
1058 orphanRegionRange.setSecond(end);
1063 if (orphanRegionRange == null) {
1064 LOG.warn("No data in dir " + p + ", sidelining data");
1065 fixes++;
1066 sidelineRegionDir(fs, hi);
1067 return;
1069 LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
1070 Bytes.toString(orphanRegionRange.getSecond()) + ")");
1072 // create new region on hdfs. move data into place.
1073 RegionInfo regionInfo = RegionInfoBuilder.newBuilder(template.getTableName())
1074 .setStartKey(orphanRegionRange.getFirst())
1075 .setEndKey(Bytes.add(orphanRegionRange.getSecond(), new byte[1]))
1076 .build();
1077 LOG.info("Creating new region : " + regionInfo);
1078 HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), regionInfo, template);
1079 Path target = region.getRegionFileSystem().getRegionDir();
1081 // rename all the data to new region
1082 mergeRegionDirs(target, hi);
1083 fixes++;
1087 * This method determines if there are table integrity errors in HDFS. If
1088 * there are errors and the appropriate "fix" options are enabled, the method
1089 * will first correct orphan regions making them into legit regiondirs, and
1090 * then reload to merge potentially overlapping regions.
1092 * @return number of table integrity errors found
1094 private int restoreHdfsIntegrity() throws IOException, InterruptedException {
1095 // Determine what's on HDFS
1096 LOG.info("Loading HBase regioninfo from HDFS...");
1097 loadHdfsRegionDirs(); // populating regioninfo table.
1099 int errs = errors.getErrorList().size();
1100 // First time just get suggestions.
1101 tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1102 checkHdfsIntegrity(false, false);
1104 if (errors.getErrorList().size() == errs) {
1105 LOG.info("No integrity errors. We are done with this phase. Glorious.");
1106 return 0;
1109 if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
1110 adoptHdfsOrphans(orphanHdfsDirs);
1111 // TODO optimize by incrementally adding instead of reloading.
1114 // Make sure there are no holes now.
1115 if (shouldFixHdfsHoles()) {
1116 clearState(); // this also resets # fixes.
1117 loadHdfsRegionDirs();
1118 tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1119 tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
1122 // Now we fix overlaps
1123 if (shouldFixHdfsOverlaps()) {
1124 // second pass we fix overlaps.
1125 clearState(); // this also resets # fixes.
1126 loadHdfsRegionDirs();
1127 tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1128 tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
1131 return errors.getErrorList().size();
1135 * Scan all the store file names to find any lingering reference files,
1136 * which refer to some none-exiting files. If "fix" option is enabled,
1137 * any lingering reference file will be sidelined if found.
1138 * <p>
1139 * Lingering reference file prevents a region from opening. It has to
1140 * be fixed before a cluster can start properly.
1142 private void offlineReferenceFileRepair() throws IOException, InterruptedException {
1143 clearState();
1144 Configuration conf = getConf();
1145 Path hbaseRoot = FSUtils.getRootDir(conf);
1146 FileSystem fs = hbaseRoot.getFileSystem(conf);
1147 LOG.info("Computing mapping of all store files");
1148 Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot,
1149 new FSUtils.ReferenceFileFilter(fs), executor, errors);
1150 errors.print("");
1151 LOG.info("Validating mapping using HDFS state");
1152 for (Path path: allFiles.values()) {
1153 Path referredToFile = StoreFileInfo.getReferredToFile(path);
1154 if (fs.exists(referredToFile)) continue; // good, expected
1156 // Found a lingering reference file
1157 errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
1158 "Found lingering reference file " + path);
1159 if (!shouldFixReferenceFiles()) continue;
1161 // Now, trying to fix it since requested
1162 boolean success = false;
1163 String pathStr = path.toString();
1165 // A reference file path should be like
1166 // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/referred_file.region_name
1167 // Up 5 directories to get the root folder.
1168 // So the file will be sidelined to a similar folder structure.
1169 int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
1170 for (int i = 0; index > 0 && i < 5; i++) {
1171 index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
1173 if (index > 0) {
1174 Path rootDir = getSidelineDir();
1175 Path dst = new Path(rootDir, pathStr.substring(index + 1));
1176 fs.mkdirs(dst.getParent());
1177 LOG.info("Trying to sideline reference file "
1178 + path + " to " + dst);
1179 setShouldRerun();
1181 success = fs.rename(path, dst);
1182 debugLsr(dst);
1185 if (!success) {
1186 LOG.error("Failed to sideline reference file " + path);
1192 * Scan all the store file names to find any lingering HFileLink files,
1193 * which refer to some none-exiting files. If "fix" option is enabled,
1194 * any lingering HFileLink file will be sidelined if found.
1196 private void offlineHLinkFileRepair() throws IOException, InterruptedException {
1197 Configuration conf = getConf();
1198 Path hbaseRoot = FSUtils.getRootDir(conf);
1199 FileSystem fs = hbaseRoot.getFileSystem(conf);
1200 LOG.info("Computing mapping of all link files");
1201 Map<String, Path> allFiles = FSUtils
1202 .getTableStoreFilePathMap(fs, hbaseRoot, new FSUtils.HFileLinkFilter(), executor, errors);
1203 errors.print("");
1205 LOG.info("Validating mapping using HDFS state");
1206 for (Path path : allFiles.values()) {
1207 // building HFileLink object to gather locations
1208 HFileLink actualLink = HFileLink.buildFromHFileLinkPattern(conf, path);
1209 if (actualLink.exists(fs)) continue; // good, expected
1211 // Found a lingering HFileLink
1212 errors.reportError(ERROR_CODE.LINGERING_HFILELINK, "Found lingering HFileLink " + path);
1213 if (!shouldFixHFileLinks()) continue;
1215 // Now, trying to fix it since requested
1216 setShouldRerun();
1218 // An HFileLink path should be like
1219 // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/linkedtable=linkedregionname-linkedhfilename
1220 // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
1221 boolean success = sidelineFile(fs, hbaseRoot, path);
1223 if (!success) {
1224 LOG.error("Failed to sideline HFileLink file " + path);
1227 // An HFileLink backreference path should be like
1228 // ${hbase.rootdir}/archive/data/namespace/table_name/region_id/family_name/.links-linkedhfilename
1229 // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
1230 Path backRefPath = FileLink.getBackReferencesDir(HFileArchiveUtil
1231 .getStoreArchivePath(conf, HFileLink.getReferencedTableName(path.getName().toString()),
1232 HFileLink.getReferencedRegionName(path.getName().toString()),
1233 path.getParent().getName()),
1234 HFileLink.getReferencedHFileName(path.getName().toString()));
1235 success = sidelineFile(fs, hbaseRoot, backRefPath);
1237 if (!success) {
1238 LOG.error("Failed to sideline HFileLink backreference file " + path);
1243 private boolean sidelineFile(FileSystem fs, Path hbaseRoot, Path path) throws IOException {
1244 URI uri = hbaseRoot.toUri().relativize(path.toUri());
1245 if (uri.isAbsolute()) return false;
1246 String relativePath = uri.getPath();
1247 Path rootDir = getSidelineDir();
1248 Path dst = new Path(rootDir, relativePath);
1249 boolean pathCreated = fs.mkdirs(dst.getParent());
1250 if (!pathCreated) {
1251 LOG.error("Failed to create path: " + dst.getParent());
1252 return false;
1254 LOG.info("Trying to sideline file " + path + " to " + dst);
1255 return fs.rename(path, dst);
1259 * TODO -- need to add tests for this.
1261 private void reportEmptyMetaCells() {
1262 errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " +
1263 emptyRegionInfoQualifiers.size());
1264 if (details) {
1265 for (Result r: emptyRegionInfoQualifiers) {
1266 errors.print(" " + r);
1272 * TODO -- need to add tests for this.
1274 private void reportTablesInFlux() {
1275 AtomicInteger numSkipped = new AtomicInteger(0);
1276 TableDescriptor[] allTables = getTables(numSkipped);
1277 errors.print("Number of Tables: " + allTables.length);
1278 if (details) {
1279 if (numSkipped.get() > 0) {
1280 errors.detail("Number of Tables in flux: " + numSkipped.get());
1282 for (TableDescriptor td : allTables) {
1283 errors.detail(" Table: " + td.getTableName() + "\t" +
1284 (td.isReadOnly() ? "ro" : "rw") + "\t" +
1285 (td.isMetaRegion() ? "META" : " ") + "\t" +
1286 " families: " + td.getColumnFamilyCount());
1291 public ErrorReporter getErrors() {
1292 return errors;
1296 * Read the .regioninfo file from the file system. If there is no
1297 * .regioninfo, add it to the orphan hdfs region list.
1299 private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
1300 Path regionDir = hbi.getHdfsRegionDir();
1301 if (regionDir == null) {
1302 if (hbi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
1303 // Log warning only for default/ primary replica with no region dir
1304 LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
1306 return;
1309 if (hbi.hdfsEntry.hri != null) {
1310 // already loaded data
1311 return;
1314 FileSystem fs = FileSystem.get(getConf());
1315 RegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
1316 LOG.debug("RegionInfo read: " + hri.toString());
1317 hbi.hdfsEntry.hri = hri;
1321 * Exception thrown when a integrity repair operation fails in an
1322 * unresolvable way.
1324 public static class RegionRepairException extends IOException {
1325 private static final long serialVersionUID = 1L;
1326 final IOException ioe;
1327 public RegionRepairException(String s, IOException ioe) {
1328 super(s);
1329 this.ioe = ioe;
1334 * Populate hbi's from regionInfos loaded from file system.
1336 private SortedMap<TableName, TableInfo> loadHdfsRegionInfos()
1337 throws IOException, InterruptedException {
1338 tablesInfo.clear(); // regenerating the data
1339 // generate region split structure
1340 Collection<HbckInfo> hbckInfos = regionInfoMap.values();
1342 // Parallelized read of .regioninfo files.
1343 List<WorkItemHdfsRegionInfo> hbis = new ArrayList<>(hbckInfos.size());
1344 List<Future<Void>> hbiFutures;
1346 for (HbckInfo hbi : hbckInfos) {
1347 WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
1348 hbis.add(work);
1351 // Submit and wait for completion
1352 hbiFutures = executor.invokeAll(hbis);
1354 for(int i=0; i<hbiFutures.size(); i++) {
1355 WorkItemHdfsRegionInfo work = hbis.get(i);
1356 Future<Void> f = hbiFutures.get(i);
1357 try {
1358 f.get();
1359 } catch(ExecutionException e) {
1360 LOG.warn("Failed to read .regioninfo file for region " +
1361 work.hbi.getRegionNameAsString(), e.getCause());
1365 Path hbaseRoot = FSUtils.getRootDir(getConf());
1366 FileSystem fs = hbaseRoot.getFileSystem(getConf());
1367 // serialized table info gathering.
1368 for (HbckInfo hbi: hbckInfos) {
1370 if (hbi.getHdfsHRI() == null) {
1371 // was an orphan
1372 continue;
1376 // get table name from hdfs, populate various HBaseFsck tables.
1377 TableName tableName = hbi.getTableName();
1378 if (tableName == null) {
1379 // There was an entry in hbase:meta not in the HDFS?
1380 LOG.warn("tableName was null for: " + hbi);
1381 continue;
1384 TableInfo modTInfo = tablesInfo.get(tableName);
1385 if (modTInfo == null) {
1386 // only executed once per table.
1387 modTInfo = new TableInfo(tableName);
1388 tablesInfo.put(tableName, modTInfo);
1389 try {
1390 TableDescriptor htd =
1391 FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
1392 modTInfo.htds.add(htd);
1393 } catch (IOException ioe) {
1394 if (!orphanTableDirs.containsKey(tableName)) {
1395 LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1396 //should only report once for each table
1397 errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1398 "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1399 Set<String> columns = new HashSet<>();
1400 orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1404 if (!hbi.isSkipChecks()) {
1405 modTInfo.addRegionInfo(hbi);
1409 loadTableInfosForTablesWithNoRegion();
1410 errors.print("");
1412 return tablesInfo;
1416 * To get the column family list according to the column family dirs
1417 * @param columns
1418 * @param hbi
1419 * @return a set of column families
1420 * @throws IOException
1422 private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
1423 Path regionDir = hbi.getHdfsRegionDir();
1424 FileSystem fs = regionDir.getFileSystem(getConf());
1425 FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1426 for (FileStatus subdir : subDirs) {
1427 String columnfamily = subdir.getPath().getName();
1428 columns.add(columnfamily);
1430 return columns;
1434 * To fabricate a .tableinfo file with following contents<br>
1435 * 1. the correct tablename <br>
1436 * 2. the correct colfamily list<br>
1437 * 3. the default properties for both {@link TableDescriptor} and {@link ColumnFamilyDescriptor}<br>
1438 * @throws IOException
1440 private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
1441 Set<String> columns) throws IOException {
1442 if (columns ==null || columns.isEmpty()) return false;
1443 TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName);
1444 for (String columnfamimly : columns) {
1445 builder.setColumnFamily(ColumnFamilyDescriptorBuilder.of(columnfamimly));
1447 fstd.createTableDescriptor(builder.build(), true);
1448 return true;
1452 * To fix the empty REGIONINFO_QUALIFIER rows from hbase:meta <br>
1453 * @throws IOException
1455 public void fixEmptyMetaCells() throws IOException {
1456 if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
1457 LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1458 for (Result region : emptyRegionInfoQualifiers) {
1459 deleteMetaRegion(region.getRow());
1460 errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
1462 emptyRegionInfoQualifiers.clear();
1467 * To fix orphan table by creating a .tableinfo file under tableDir <br>
1468 * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
1469 * 2. else create a default .tableinfo file with following items<br>
1470 * &nbsp;2.1 the correct tablename <br>
1471 * &nbsp;2.2 the correct colfamily list<br>
1472 * &nbsp;2.3 the default properties for both {@link TableDescriptor} and {@link ColumnFamilyDescriptor}<br>
1473 * @throws IOException
1475 public void fixOrphanTables() throws IOException {
1476 if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1478 List<TableName> tmpList = new ArrayList<>(orphanTableDirs.keySet().size());
1479 tmpList.addAll(orphanTableDirs.keySet());
1480 TableDescriptor[] htds = getTableDescriptors(tmpList);
1481 Iterator<Entry<TableName, Set<String>>> iter =
1482 orphanTableDirs.entrySet().iterator();
1483 int j = 0;
1484 int numFailedCase = 0;
1485 FSTableDescriptors fstd = new FSTableDescriptors(getConf());
1486 while (iter.hasNext()) {
1487 Entry<TableName, Set<String>> entry =
1488 iter.next();
1489 TableName tableName = entry.getKey();
1490 LOG.info("Trying to fix orphan table error: " + tableName);
1491 if (j < htds.length) {
1492 if (tableName.equals(htds[j].getTableName())) {
1493 TableDescriptor htd = htds[j];
1494 LOG.info("fixing orphan table: " + tableName + " from cache");
1495 fstd.createTableDescriptor(htd, true);
1496 j++;
1497 iter.remove();
1499 } else {
1500 if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
1501 LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1502 LOG.warn("Strongly recommend to modify the TableDescriptor if necessary for: " + tableName);
1503 iter.remove();
1504 } else {
1505 LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
1506 numFailedCase++;
1509 fixes++;
1512 if (orphanTableDirs.isEmpty()) {
1513 // all orphanTableDirs are luckily recovered
1514 // re-run doFsck after recovering the .tableinfo file
1515 setShouldRerun();
1516 LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1517 } else if (numFailedCase > 0) {
1518 LOG.error("Failed to fix " + numFailedCase
1519 + " OrphanTables with default .tableinfo files");
1523 //cleanup the list
1524 orphanTableDirs.clear();
1529 * This borrows code from MasterFileSystem.bootstrap(). Explicitly creates it's own WAL, so be
1530 * sure to close it as well as the region when you're finished.
1531 * @param walFactoryID A unique identifier for WAL factory. Filesystem implementations will use
1532 * this ID to make a directory inside WAL directory path.
1533 * @return an open hbase:meta HRegion
1535 private HRegion createNewMeta(String walFactoryID) throws IOException {
1536 Path rootdir = FSUtils.getRootDir(getConf());
1537 Configuration c = getConf();
1538 RegionInfo metaHRI = RegionInfoBuilder.FIRST_META_REGIONINFO;
1539 TableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME);
1540 MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, false);
1541 // The WAL subsystem will use the default rootDir rather than the passed in rootDir
1542 // unless I pass along via the conf.
1543 Configuration confForWAL = new Configuration(c);
1544 confForWAL.set(HConstants.HBASE_DIR, rootdir.toString());
1545 WAL wal = new WALFactory(confForWAL, walFactoryID).getWAL(metaHRI);
1546 HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c, metaDescriptor, wal);
1547 MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, true);
1548 return meta;
1552 * Generate set of puts to add to new meta. This expects the tables to be
1553 * clean with no overlaps or holes. If there are any problems it returns null.
1555 * @return An array list of puts to do in bulk, null if tables have problems
1557 private ArrayList<Put> generatePuts(SortedMap<TableName, TableInfo> tablesInfo)
1558 throws IOException {
1559 ArrayList<Put> puts = new ArrayList<>();
1560 boolean hasProblems = false;
1561 for (Entry<TableName, TableInfo> e : tablesInfo.entrySet()) {
1562 TableName name = e.getKey();
1564 // skip "hbase:meta"
1565 if (name.compareTo(TableName.META_TABLE_NAME) == 0) {
1566 continue;
1569 TableInfo ti = e.getValue();
1570 puts.add(MetaTableAccessor.makePutFromTableState(
1571 new TableState(ti.tableName, TableState.State.ENABLED),
1572 EnvironmentEdgeManager.currentTime()));
1573 for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
1574 .entrySet()) {
1575 Collection<HbckInfo> his = spl.getValue();
1576 int sz = his.size();
1577 if (sz != 1) {
1578 // problem
1579 LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
1580 + " had " + sz + " regions instead of exactly 1." );
1581 hasProblems = true;
1582 continue;
1585 // add the row directly to meta.
1586 HbckInfo hi = his.iterator().next();
1587 RegionInfo hri = hi.getHdfsHRI(); // hi.metaEntry;
1588 Put p = MetaTableAccessor.makePutFromRegionInfo(hri, EnvironmentEdgeManager.currentTime());
1589 puts.add(p);
1592 return hasProblems ? null : puts;
1596 * Suggest fixes for each table
1598 private void suggestFixes(
1599 SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1600 logParallelMerge();
1601 for (TableInfo tInfo : tablesInfo.values()) {
1602 TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1603 tInfo.checkRegionChain(handler);
1608 * Rebuilds meta from information in hdfs/fs. Depends on configuration settings passed into
1609 * hbck constructor to point to a particular fs/dir. Assumes HBase is OFFLINE.
1611 * @param fix flag that determines if method should attempt to fix holes
1612 * @return true if successful, false if attempt failed.
1614 public boolean rebuildMeta(boolean fix) throws IOException,
1615 InterruptedException {
1617 // TODO check to make sure hbase is offline. (or at least the table
1618 // currently being worked on is off line)
1620 // Determine what's on HDFS
1621 LOG.info("Loading HBase regioninfo from HDFS...");
1622 loadHdfsRegionDirs(); // populating regioninfo table.
1624 int errs = errors.getErrorList().size();
1625 tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1626 checkHdfsIntegrity(false, false);
1628 // make sure ok.
1629 if (errors.getErrorList().size() != errs) {
1630 // While in error state, iterate until no more fixes possible
1631 while(true) {
1632 fixes = 0;
1633 suggestFixes(tablesInfo);
1634 errors.clear();
1635 loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1636 checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
1638 int errCount = errors.getErrorList().size();
1640 if (fixes == 0) {
1641 if (errCount > 0) {
1642 return false; // failed to fix problems.
1643 } else {
1644 break; // no fixes and no problems? drop out and fix stuff!
1650 // we can rebuild, move old meta out of the way and start
1651 LOG.info("HDFS regioninfo's seems good. Sidelining old hbase:meta");
1652 Path backupDir = sidelineOldMeta();
1654 LOG.info("Creating new hbase:meta");
1655 String walFactoryId = "hbck-meta-recovery-" + RandomStringUtils.randomNumeric(8);
1656 HRegion meta = createNewMeta(walFactoryId);
1658 // populate meta
1659 List<Put> puts = generatePuts(tablesInfo);
1660 if (puts == null) {
1661 LOG.error(HBaseMarkers.FATAL, "Problem encountered when creating new hbase:meta "
1662 + "entries. You may need to restore the previously sidelined hbase:meta");
1663 return false;
1665 meta.batchMutate(puts.toArray(new Put[puts.size()]), HConstants.NO_NONCE, HConstants.NO_NONCE);
1666 meta.close();
1667 if (meta.getWAL() != null) {
1668 meta.getWAL().close();
1670 // clean up the temporary hbck meta recovery WAL directory
1671 removeHBCKMetaRecoveryWALDir(walFactoryId);
1672 LOG.info("Success! hbase:meta table rebuilt.");
1673 LOG.info("Old hbase:meta is moved into " + backupDir);
1674 return true;
1678 * Removes the empty Meta recovery WAL directory.
1679 * @param walFactoryId A unique identifier for WAL factory which was used by Filesystem to make a
1680 * Meta recovery WAL directory inside WAL directory path.
1682 private void removeHBCKMetaRecoveryWALDir(String walFactoryId) throws IOException {
1683 Path walLogDir = new Path(new Path(CommonFSUtils.getWALRootDir(getConf()),
1684 HConstants.HREGION_LOGDIR_NAME), walFactoryId);
1685 FileSystem fs = CommonFSUtils.getWALFileSystem(getConf());
1686 FileStatus[] walFiles = FSUtils.listStatus(fs, walLogDir, null);
1687 if (walFiles == null || walFiles.length == 0) {
1688 LOG.info("HBCK meta recovery WAL directory is empty, removing it now.");
1689 if (!FSUtils.deleteDirectory(fs, walLogDir)) {
1690 LOG.warn("Couldn't clear the HBCK Meta recovery WAL directory " + walLogDir);
1696 * Log an appropriate message about whether or not overlapping merges are computed in parallel.
1698 private void logParallelMerge() {
1699 if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
1700 LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" +
1701 " false to run serially.");
1702 } else {
1703 LOG.info("Handling overlap merges serially. set hbasefsck.overlap.merge.parallel to" +
1704 " true to run in parallel.");
1708 private SortedMap<TableName, TableInfo> checkHdfsIntegrity(boolean fixHoles,
1709 boolean fixOverlaps) throws IOException {
1710 LOG.info("Checking HBase region split map from HDFS data...");
1711 logParallelMerge();
1712 for (TableInfo tInfo : tablesInfo.values()) {
1713 TableIntegrityErrorHandler handler;
1714 if (fixHoles || fixOverlaps) {
1715 handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1716 fixHoles, fixOverlaps);
1717 } else {
1718 handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1720 if (!tInfo.checkRegionChain(handler)) {
1721 // should dump info as well.
1722 errors.report("Found inconsistency in table " + tInfo.getName());
1725 return tablesInfo;
1728 private Path getSidelineDir() throws IOException {
1729 if (sidelineDir == null) {
1730 Path hbaseDir = FSUtils.getRootDir(getConf());
1731 Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1732 sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1733 + startMillis);
1735 return sidelineDir;
1739 * Sideline a region dir (instead of deleting it)
1741 Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException {
1742 return sidelineRegionDir(fs, null, hi);
1746 * Sideline a region dir (instead of deleting it)
1748 * @param parentDir if specified, the region will be sidelined to folder like
1749 * {@literal .../parentDir/<table name>/<region name>}. The purpose is to group together
1750 * similar regions sidelined, for example, those regions should be bulk loaded back later
1751 * on. If NULL, it is ignored.
1753 Path sidelineRegionDir(FileSystem fs,
1754 String parentDir, HbckInfo hi) throws IOException {
1755 TableName tableName = hi.getTableName();
1756 Path regionDir = hi.getHdfsRegionDir();
1758 if (!fs.exists(regionDir)) {
1759 LOG.warn("No previous " + regionDir + " exists. Continuing.");
1760 return null;
1763 Path rootDir = getSidelineDir();
1764 if (parentDir != null) {
1765 rootDir = new Path(rootDir, parentDir);
1767 Path sidelineTableDir= FSUtils.getTableDir(rootDir, tableName);
1768 Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1769 fs.mkdirs(sidelineRegionDir);
1770 boolean success = false;
1771 FileStatus[] cfs = fs.listStatus(regionDir);
1772 if (cfs == null) {
1773 LOG.info("Region dir is empty: " + regionDir);
1774 } else {
1775 for (FileStatus cf : cfs) {
1776 Path src = cf.getPath();
1777 Path dst = new Path(sidelineRegionDir, src.getName());
1778 if (fs.isFile(src)) {
1779 // simple file
1780 success = fs.rename(src, dst);
1781 if (!success) {
1782 String msg = "Unable to rename file " + src + " to " + dst;
1783 LOG.error(msg);
1784 throw new IOException(msg);
1786 continue;
1789 // is a directory.
1790 fs.mkdirs(dst);
1792 LOG.info("Sidelining files from " + src + " into containing region " + dst);
1793 // FileSystem.rename is inconsistent with directories -- if the
1794 // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
1795 // it moves the src into the dst dir resulting in (foo/a/b). If
1796 // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
1797 FileStatus[] hfiles = fs.listStatus(src);
1798 if (hfiles != null && hfiles.length > 0) {
1799 for (FileStatus hfile : hfiles) {
1800 success = fs.rename(hfile.getPath(), dst);
1801 if (!success) {
1802 String msg = "Unable to rename file " + src + " to " + dst;
1803 LOG.error(msg);
1804 throw new IOException(msg);
1808 LOG.debug("Sideline directory contents:");
1809 debugLsr(sidelineRegionDir);
1813 LOG.info("Removing old region dir: " + regionDir);
1814 success = fs.delete(regionDir, true);
1815 if (!success) {
1816 String msg = "Unable to delete dir " + regionDir;
1817 LOG.error(msg);
1818 throw new IOException(msg);
1820 return sidelineRegionDir;
1824 * Side line an entire table.
1826 void sidelineTable(FileSystem fs, TableName tableName, Path hbaseDir,
1827 Path backupHbaseDir) throws IOException {
1828 Path tableDir = FSUtils.getTableDir(hbaseDir, tableName);
1829 if (fs.exists(tableDir)) {
1830 Path backupTableDir= FSUtils.getTableDir(backupHbaseDir, tableName);
1831 fs.mkdirs(backupTableDir.getParent());
1832 boolean success = fs.rename(tableDir, backupTableDir);
1833 if (!success) {
1834 throw new IOException("Failed to move " + tableName + " from "
1835 + tableDir + " to " + backupTableDir);
1837 } else {
1838 LOG.info("No previous " + tableName + " exists. Continuing.");
1843 * @return Path to backup of original directory
1845 Path sidelineOldMeta() throws IOException {
1846 // put current hbase:meta aside.
1847 Path hbaseDir = FSUtils.getRootDir(getConf());
1848 FileSystem fs = hbaseDir.getFileSystem(getConf());
1849 Path backupDir = getSidelineDir();
1850 fs.mkdirs(backupDir);
1852 try {
1853 sidelineTable(fs, TableName.META_TABLE_NAME, hbaseDir, backupDir);
1854 } catch (IOException e) {
1855 LOG.error(HBaseMarkers.FATAL, "... failed to sideline meta. Currently in "
1856 + "inconsistent state. To restore try to rename hbase:meta in " +
1857 backupDir.getName() + " to " + hbaseDir.getName() + ".", e);
1858 throw e; // throw original exception
1860 return backupDir;
1864 * Load the list of disabled tables in ZK into local set.
1865 * @throws ZooKeeperConnectionException
1866 * @throws IOException
1868 private void loadTableStates()
1869 throws IOException {
1870 tableStates = MetaTableAccessor.getTableStates(connection);
1871 // Add hbase:meta so this tool keeps working. In hbase2, meta is always enabled though it
1872 // has no entry in the table states. HBCK doesn't work right w/ hbase2 but just do this in
1873 // meantime.
1874 this.tableStates.put(TableName.META_TABLE_NAME,
1875 new TableState(TableName.META_TABLE_NAME, TableState.State.ENABLED));
1879 * Check if the specified region's table is disabled.
1880 * @param tableName table to check status of
1882 private boolean isTableDisabled(TableName tableName) {
1883 return tableStates.containsKey(tableName)
1884 && tableStates.get(tableName)
1885 .inStates(TableState.State.DISABLED, TableState.State.DISABLING);
1889 * Scan HDFS for all regions, recording their information into
1890 * regionInfoMap
1892 public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1893 Path rootDir = FSUtils.getRootDir(getConf());
1894 FileSystem fs = rootDir.getFileSystem(getConf());
1896 // list all tables from HDFS
1897 List<FileStatus> tableDirs = Lists.newArrayList();
1899 boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1901 List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1902 for (Path path : paths) {
1903 TableName tableName = FSUtils.getTableName(path);
1904 if ((!checkMetaOnly &&
1905 isTableIncluded(tableName)) ||
1906 tableName.equals(TableName.META_TABLE_NAME)) {
1907 tableDirs.add(fs.getFileStatus(path));
1911 // verify that version file exists
1912 if (!foundVersionFile) {
1913 errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1914 "Version file does not exist in root dir " + rootDir);
1915 if (shouldFixVersionFile()) {
1916 LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1917 + " file.");
1918 setShouldRerun();
1919 FSUtils.setVersion(fs, rootDir, getConf().getInt(
1920 HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1921 HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1922 HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1926 // Avoid multithreading at table-level because already multithreaded internally at
1927 // region-level. Additionally multithreading at table-level can lead to deadlock
1928 // if there are many tables in the cluster. Since there are a limited # of threads
1929 // in the executor's thread pool and if we multithread at the table-level by putting
1930 // WorkItemHdfsDir callables into the executor, then we will have some threads in the
1931 // executor tied up solely in waiting for the tables' region-level calls to complete.
1932 // If there are enough tables then there will be no actual threads in the pool left
1933 // for the region-level callables to be serviced.
1934 for (FileStatus tableDir : tableDirs) {
1935 LOG.debug("Loading region dirs from " +tableDir.getPath());
1936 WorkItemHdfsDir item = new WorkItemHdfsDir(fs, errors, tableDir);
1937 try {
1938 item.call();
1939 } catch (ExecutionException e) {
1940 LOG.warn("Could not completely load table dir " +
1941 tableDir.getPath(), e.getCause());
1944 errors.print("");
1948 * Record the location of the hbase:meta region as found in ZooKeeper.
1950 private boolean recordMetaRegion() throws IOException {
1951 RegionLocations rl = connection.locateRegion(TableName.META_TABLE_NAME,
1952 HConstants.EMPTY_START_ROW, false, false);
1953 if (rl == null) {
1954 errors.reportError(ERROR_CODE.NULL_META_REGION,
1955 "META region was not found in ZooKeeper");
1956 return false;
1958 for (HRegionLocation metaLocation : rl.getRegionLocations()) {
1959 // Check if Meta region is valid and existing
1960 if (metaLocation == null ) {
1961 errors.reportError(ERROR_CODE.NULL_META_REGION,
1962 "META region location is null");
1963 return false;
1965 if (metaLocation.getRegionInfo() == null) {
1966 errors.reportError(ERROR_CODE.NULL_META_REGION,
1967 "META location regionInfo is null");
1968 return false;
1970 if (metaLocation.getHostname() == null) {
1971 errors.reportError(ERROR_CODE.NULL_META_REGION,
1972 "META location hostName is null");
1973 return false;
1975 ServerName sn = metaLocation.getServerName();
1976 MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, EnvironmentEdgeManager.currentTime());
1977 HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
1978 if (hbckInfo == null) {
1979 regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));
1980 } else {
1981 hbckInfo.metaEntry = m;
1984 return true;
1987 private ZKWatcher createZooKeeperWatcher() throws IOException {
1988 return new ZKWatcher(getConf(), "hbase Fsck", new Abortable() {
1989 @Override
1990 public void abort(String why, Throwable e) {
1991 LOG.error(why, e);
1992 System.exit(1);
1995 @Override
1996 public boolean isAborted() {
1997 return false;
2004 * Contacts each regionserver and fetches metadata about regions.
2005 * @param regionServerList - the list of region servers to connect to
2006 * @throws IOException if a remote or network exception occurs
2008 void processRegionServers(Collection<ServerName> regionServerList)
2009 throws IOException, InterruptedException {
2011 List<WorkItemRegion> workItems = new ArrayList<>(regionServerList.size());
2012 List<Future<Void>> workFutures;
2014 // loop to contact each region server in parallel
2015 for (ServerName rsinfo: regionServerList) {
2016 workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
2019 workFutures = executor.invokeAll(workItems);
2021 for(int i=0; i<workFutures.size(); i++) {
2022 WorkItemRegion item = workItems.get(i);
2023 Future<Void> f = workFutures.get(i);
2024 try {
2025 f.get();
2026 } catch(ExecutionException e) {
2027 LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
2028 e.getCause());
2034 * Check consistency of all regions that have been found in previous phases.
2036 private void checkAndFixConsistency()
2037 throws IOException, KeeperException, InterruptedException {
2038 // Divide the checks in two phases. One for default/primary replicas and another
2039 // for the non-primary ones. Keeps code cleaner this way.
2041 List<CheckRegionConsistencyWorkItem> workItems = new ArrayList<>(regionInfoMap.size());
2042 for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
2043 if (e.getValue().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2044 workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
2047 checkRegionConsistencyConcurrently(workItems);
2049 boolean prevHdfsCheck = shouldCheckHdfs();
2050 setCheckHdfs(false); //replicas don't have any hdfs data
2051 // Run a pass over the replicas and fix any assignment issues that exist on the currently
2052 // deployed/undeployed replicas.
2053 List<CheckRegionConsistencyWorkItem> replicaWorkItems = new ArrayList<>(regionInfoMap.size());
2054 for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
2055 if (e.getValue().getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2056 replicaWorkItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
2059 checkRegionConsistencyConcurrently(replicaWorkItems);
2060 setCheckHdfs(prevHdfsCheck);
2062 // If some regions is skipped during checkRegionConsistencyConcurrently() phase, we might
2063 // not get accurate state of the hbase if continuing. The config here allows users to tune
2064 // the tolerance of number of skipped region.
2065 // TODO: evaluate the consequence to continue the hbck operation without config.
2066 int terminateThreshold = getConf().getInt("hbase.hbck.skipped.regions.limit", 0);
2067 int numOfSkippedRegions = skippedRegions.size();
2068 if (numOfSkippedRegions > 0 && numOfSkippedRegions > terminateThreshold) {
2069 throw new IOException(numOfSkippedRegions
2070 + " region(s) could not be checked or repaired. See logs for detail.");
2073 if (shouldCheckHdfs()) {
2074 checkAndFixTableStates();
2079 * Check consistency of all regions using mulitple threads concurrently.
2081 private void checkRegionConsistencyConcurrently(
2082 final List<CheckRegionConsistencyWorkItem> workItems)
2083 throws IOException, KeeperException, InterruptedException {
2084 if (workItems.isEmpty()) {
2085 return; // nothing to check
2088 List<Future<Void>> workFutures = executor.invokeAll(workItems);
2089 for(Future<Void> f: workFutures) {
2090 try {
2091 f.get();
2092 } catch(ExecutionException e1) {
2093 LOG.warn("Could not check region consistency " , e1.getCause());
2094 if (e1.getCause() instanceof IOException) {
2095 throw (IOException)e1.getCause();
2096 } else if (e1.getCause() instanceof KeeperException) {
2097 throw (KeeperException)e1.getCause();
2098 } else if (e1.getCause() instanceof InterruptedException) {
2099 throw (InterruptedException)e1.getCause();
2100 } else {
2101 throw new IOException(e1.getCause());
2107 class CheckRegionConsistencyWorkItem implements Callable<Void> {
2108 private final String key;
2109 private final HbckInfo hbi;
2111 CheckRegionConsistencyWorkItem(String key, HbckInfo hbi) {
2112 this.key = key;
2113 this.hbi = hbi;
2116 @Override
2117 public synchronized Void call() throws Exception {
2118 try {
2119 checkRegionConsistency(key, hbi);
2120 } catch (Exception e) {
2121 // If the region is non-META region, skip this region and send warning/error message; if
2122 // the region is META region, we should not continue.
2123 LOG.warn("Unable to complete check or repair the region '" + hbi.getRegionNameAsString()
2124 + "'.", e);
2125 if (hbi.getHdfsHRI().isMetaRegion()) {
2126 throw e;
2128 LOG.warn("Skip region '" + hbi.getRegionNameAsString() + "'");
2129 addSkippedRegion(hbi);
2131 return null;
2135 private void addSkippedRegion(final HbckInfo hbi) {
2136 Set<String> skippedRegionNames = skippedRegions.get(hbi.getTableName());
2137 if (skippedRegionNames == null) {
2138 skippedRegionNames = new HashSet<>();
2140 skippedRegionNames.add(hbi.getRegionNameAsString());
2141 skippedRegions.put(hbi.getTableName(), skippedRegionNames);
2145 * Check and fix table states, assumes full info available:
2146 * - tableInfos
2147 * - empty tables loaded
2149 private void checkAndFixTableStates() throws IOException {
2150 // first check dangling states
2151 for (Entry<TableName, TableState> entry : tableStates.entrySet()) {
2152 TableName tableName = entry.getKey();
2153 TableState tableState = entry.getValue();
2154 TableInfo tableInfo = tablesInfo.get(tableName);
2155 if (isTableIncluded(tableName)
2156 && !tableName.isSystemTable()
2157 && tableInfo == null) {
2158 if (fixMeta) {
2159 MetaTableAccessor.deleteTableState(connection, tableName);
2160 TableState state = MetaTableAccessor.getTableState(connection, tableName);
2161 if (state != null) {
2162 errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
2163 tableName + " unable to delete dangling table state " + tableState);
2165 } else if (!checkMetaOnly) {
2166 // dangling table state in meta if checkMetaOnly is false. If checkMetaOnly is
2167 // true, tableInfo will be null as tablesInfo are not polulated for all tables from hdfs
2168 errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE,
2169 tableName + " has dangling table state " + tableState);
2173 // check that all tables have states
2174 for (TableName tableName : tablesInfo.keySet()) {
2175 if (isTableIncluded(tableName) && !tableStates.containsKey(tableName)) {
2176 if (fixMeta) {
2177 MetaTableAccessor.updateTableState(connection, tableName, TableState.State.ENABLED);
2178 TableState newState = MetaTableAccessor.getTableState(connection, tableName);
2179 if (newState == null) {
2180 errors.reportError(ERROR_CODE.NO_TABLE_STATE,
2181 "Unable to change state for table " + tableName + " in meta ");
2183 } else {
2184 errors.reportError(ERROR_CODE.NO_TABLE_STATE,
2185 tableName + " has no state in meta ");
2191 private void preCheckPermission() throws IOException, AccessDeniedException {
2192 if (shouldIgnorePreCheckPermission()) {
2193 return;
2196 Path hbaseDir = FSUtils.getRootDir(getConf());
2197 FileSystem fs = hbaseDir.getFileSystem(getConf());
2198 UserProvider userProvider = UserProvider.instantiate(getConf());
2199 UserGroupInformation ugi = userProvider.getCurrent().getUGI();
2200 FileStatus[] files = fs.listStatus(hbaseDir);
2201 for (FileStatus file : files) {
2202 try {
2203 FSUtils.checkAccess(ugi, file, FsAction.WRITE);
2204 } catch (AccessDeniedException ace) {
2205 LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);
2206 errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
2207 + " does not have write perms to " + file.getPath()
2208 + ". Please rerun hbck as hdfs user " + file.getOwner());
2209 throw ace;
2215 * Deletes region from meta table
2217 private void deleteMetaRegion(HbckInfo hi) throws IOException {
2218 deleteMetaRegion(hi.metaEntry.getRegionName());
2222 * Deletes region from meta table
2224 private void deleteMetaRegion(byte[] metaKey) throws IOException {
2225 Delete d = new Delete(metaKey);
2226 meta.delete(d);
2227 LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
2231 * Reset the split parent region info in meta table
2233 private void resetSplitParent(HbckInfo hi) throws IOException {
2234 RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
2235 Delete d = new Delete(hi.metaEntry.getRegionName());
2236 d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
2237 d.addColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
2238 mutations.add(d);
2240 RegionInfo hri = RegionInfoBuilder.newBuilder(hi.metaEntry)
2241 .setOffline(false)
2242 .setSplit(false)
2243 .build();
2244 Put p = MetaTableAccessor.makePutFromRegionInfo(hri, EnvironmentEdgeManager.currentTime());
2245 mutations.add(p);
2247 meta.mutateRow(mutations);
2248 LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
2252 * This backwards-compatibility wrapper for permanently offlining a region
2253 * that should not be alive. If the region server does not support the
2254 * "offline" method, it will use the closest unassign method instead. This
2255 * will basically work until one attempts to disable or delete the affected
2256 * table. The problem has to do with in-memory only master state, so
2257 * restarting the HMaster or failing over to another should fix this.
2259 private void offline(byte[] regionName) throws IOException {
2260 String regionString = Bytes.toStringBinary(regionName);
2261 if (!rsSupportsOffline) {
2262 LOG.warn("Using unassign region " + regionString
2263 + " instead of using offline method, you should"
2264 + " restart HMaster after these repairs");
2265 admin.unassign(regionName, true);
2266 return;
2269 // first time we assume the rs's supports #offline.
2270 try {
2271 LOG.info("Offlining region " + regionString);
2272 admin.offline(regionName);
2273 } catch (IOException ioe) {
2274 String notFoundMsg = "java.lang.NoSuchMethodException: " +
2275 "org.apache.hadoop.hbase.master.HMaster.offline([B)";
2276 if (ioe.getMessage().contains(notFoundMsg)) {
2277 LOG.warn("Using unassign region " + regionString
2278 + " instead of using offline method, you should"
2279 + " restart HMaster after these repairs");
2280 rsSupportsOffline = false; // in the future just use unassign
2281 admin.unassign(regionName, true);
2282 return;
2284 throw ioe;
2288 private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
2289 undeployRegionsForHbi(hi);
2290 // undeploy replicas of the region (but only if the method is invoked for the primary)
2291 if (hi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2292 return;
2294 int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();
2295 for (int i = 1; i < numReplicas; i++) {
2296 if (hi.getPrimaryHRIForDeployedReplica() == null) continue;
2297 RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(
2298 hi.getPrimaryHRIForDeployedReplica(), i);
2299 HbckInfo h = regionInfoMap.get(hri.getEncodedName());
2300 if (h != null) {
2301 undeployRegionsForHbi(h);
2302 //set skip checks; we undeployed it, and we don't want to evaluate this anymore
2303 //in consistency checks
2304 h.setSkipChecks(true);
2309 private void undeployRegionsForHbi(HbckInfo hi) throws IOException, InterruptedException {
2310 for (OnlineEntry rse : hi.deployedEntries) {
2311 LOG.debug("Undeploy region " + rse.hri + " from " + rse.hsa);
2312 try {
2313 HBaseFsckRepair.closeRegionSilentlyAndWait(connection, rse.hsa, rse.hri);
2314 offline(rse.hri.getRegionName());
2315 } catch (IOException ioe) {
2316 LOG.warn("Got exception when attempting to offline region "
2317 + Bytes.toString(rse.hri.getRegionName()), ioe);
2323 * Attempts to undeploy a region from a region server based in information in
2324 * META. Any operations that modify the file system should make sure that
2325 * its corresponding region is not deployed to prevent data races.
2327 * A separate call is required to update the master in-memory region state
2328 * kept in the AssignementManager. Because disable uses this state instead of
2329 * that found in META, we can't seem to cleanly disable/delete tables that
2330 * have been hbck fixed. When used on a version of HBase that does not have
2331 * the offline ipc call exposed on the master (&lt;0.90.5, &lt;0.92.0) a master
2332 * restart or failover may be required.
2334 private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
2335 if (hi.metaEntry == null && hi.hdfsEntry == null) {
2336 undeployRegions(hi);
2337 return;
2340 // get assignment info and hregioninfo from meta.
2341 Get get = new Get(hi.getRegionName());
2342 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
2343 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
2344 get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
2345 // also get the locations of the replicas to close if the primary region is being closed
2346 if (hi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2347 int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();
2348 for (int i = 0; i < numReplicas; i++) {
2349 get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(i));
2350 get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(i));
2353 Result r = meta.get(get);
2354 RegionLocations rl = MetaTableAccessor.getRegionLocations(r);
2355 if (rl == null) {
2356 LOG.warn("Unable to close region " + hi.getRegionNameAsString() +
2357 " since meta does not have handle to reach it");
2358 return;
2360 for (HRegionLocation h : rl.getRegionLocations()) {
2361 ServerName serverName = h.getServerName();
2362 if (serverName == null) {
2363 errors.reportError("Unable to close region "
2364 + hi.getRegionNameAsString() + " because meta does not "
2365 + "have handle to reach it.");
2366 continue;
2368 RegionInfo hri = h.getRegionInfo();
2369 if (hri == null) {
2370 LOG.warn("Unable to close region " + hi.getRegionNameAsString()
2371 + " because hbase:meta had invalid or missing "
2372 + HConstants.CATALOG_FAMILY_STR + ":"
2373 + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
2374 + " qualifier value.");
2375 continue;
2377 // close the region -- close files and remove assignment
2378 HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri);
2382 private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
2383 KeeperException, InterruptedException {
2384 // If we are trying to fix the errors
2385 if (shouldFixAssignments()) {
2386 errors.print(msg);
2387 undeployRegions(hbi);
2388 setShouldRerun();
2389 RegionInfo hri = hbi.getHdfsHRI();
2390 if (hri == null) {
2391 hri = hbi.metaEntry;
2393 HBaseFsckRepair.fixUnassigned(admin, hri);
2394 HBaseFsckRepair.waitUntilAssigned(admin, hri);
2396 // also assign replicas if needed (do it only when this call operates on a primary replica)
2397 if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) return;
2398 int replicationCount = admin.getTableDescriptor(hri.getTable()).getRegionReplication();
2399 for (int i = 1; i < replicationCount; i++) {
2400 hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);
2401 HbckInfo h = regionInfoMap.get(hri.getEncodedName());
2402 if (h != null) {
2403 undeployRegions(h);
2404 //set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore
2405 //in consistency checks
2406 h.setSkipChecks(true);
2408 HBaseFsckRepair.fixUnassigned(admin, hri);
2409 HBaseFsckRepair.waitUntilAssigned(admin, hri);
2416 * Check a single region for consistency and correct deployment.
2418 private void checkRegionConsistency(final String key, final HbckInfo hbi)
2419 throws IOException, KeeperException, InterruptedException {
2421 if (hbi.isSkipChecks()) return;
2422 String descriptiveName = hbi.toString();
2423 boolean inMeta = hbi.metaEntry != null;
2424 // In case not checking HDFS, assume the region is on HDFS
2425 boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
2426 boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
2427 boolean isDeployed = !hbi.deployedOn.isEmpty();
2428 boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
2429 boolean deploymentMatchesMeta =
2430 hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
2431 hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
2432 boolean splitParent =
2433 inMeta && hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
2434 boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry.getTable());
2435 boolean recentlyModified = inHdfs &&
2436 hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime();
2438 // ========== First the healthy cases =============
2439 if (hbi.containsOnlyHdfsEdits()) {
2440 return;
2442 if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
2443 return;
2444 } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
2445 LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
2446 "tabled that is not deployed");
2447 return;
2448 } else if (recentlyModified) {
2449 LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
2450 return;
2452 // ========== Cases where the region is not in hbase:meta =============
2453 else if (!inMeta && !inHdfs && !isDeployed) {
2454 // We shouldn't have record of this region at all then!
2455 assert false : "Entry for region with no data";
2456 } else if (!inMeta && !inHdfs && isDeployed) {
2457 errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
2458 + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " +
2459 "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2460 if (shouldFixAssignments()) {
2461 undeployRegions(hbi);
2464 } else if (!inMeta && inHdfs && !isDeployed) {
2465 if (hbi.isMerged()) {
2466 // This region has already been merged, the remaining hdfs file will be
2467 // cleaned by CatalogJanitor later
2468 hbi.setSkipChecks(true);
2469 LOG.info("Region " + descriptiveName
2470 + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
2471 return;
2473 errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
2474 + descriptiveName + " on HDFS, but not listed in hbase:meta " +
2475 "or deployed on any region server");
2476 // restore region consistency of an adopted orphan
2477 if (shouldFixMeta()) {
2478 if (!hbi.isHdfsRegioninfoPresent()) {
2479 LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
2480 + " in table integrity repair phase if -fixHdfsOrphans was" +
2481 " used.");
2482 return;
2485 RegionInfo hri = hbi.getHdfsHRI();
2486 TableInfo tableInfo = tablesInfo.get(hri.getTable());
2488 for (RegionInfo region : tableInfo.getRegionsFromMeta()) {
2489 if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
2490 && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(),
2491 hri.getEndKey()) >= 0)
2492 && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {
2493 if(region.isSplit() || region.isOffline()) continue;
2494 Path regionDir = hbi.getHdfsRegionDir();
2495 FileSystem fs = regionDir.getFileSystem(getConf());
2496 List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
2497 for (Path familyDir : familyDirs) {
2498 List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
2499 for (Path referenceFilePath : referenceFilePaths) {
2500 Path parentRegionDir =
2501 StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
2502 if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
2503 LOG.warn(hri + " start and stop keys are in the range of " + region
2504 + ". The region might not be cleaned up from hdfs when region " + region
2505 + " split failed. Hence deleting from hdfs.");
2506 HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,
2507 regionDir.getParent(), hri);
2508 return;
2514 LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
2515 int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
2516 HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2517 admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
2518 .getLiveServerMetrics().keySet(), numReplicas);
2520 tryAssignmentRepair(hbi, "Trying to reassign region...");
2523 } else if (!inMeta && inHdfs && isDeployed) {
2524 errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
2525 + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2526 debugLsr(hbi.getHdfsRegionDir());
2527 if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2528 // for replicas, this means that we should undeploy the region (we would have
2529 // gone over the primaries and fixed meta holes in first phase under
2530 // checkAndFixConsistency; we shouldn't get the condition !inMeta at
2531 // this stage unless unwanted replica)
2532 if (shouldFixAssignments()) {
2533 undeployRegionsForHbi(hbi);
2536 if (shouldFixMeta() && hbi.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
2537 if (!hbi.isHdfsRegioninfoPresent()) {
2538 LOG.error("This should have been repaired in table integrity repair phase");
2539 return;
2542 LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
2543 int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
2544 HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2545 admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
2546 .getLiveServerMetrics().keySet(), numReplicas);
2547 tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2550 // ========== Cases where the region is in hbase:meta =============
2551 } else if (inMeta && inHdfs && !isDeployed && splitParent) {
2552 // check whether this is an actual error, or just transient state where parent
2553 // is not cleaned
2554 if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
2555 // check that split daughters are there
2556 HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
2557 HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
2558 if (infoA != null && infoB != null) {
2559 // we already processed or will process daughters. Move on, nothing to see here.
2560 hbi.setSkipChecks(true);
2561 return;
2565 // For Replica region, we need to do a similar check. If replica is not split successfully,
2566 // error is going to be reported against primary daughter region.
2567 if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
2568 LOG.info("Region " + descriptiveName + " is a split parent in META, in HDFS, "
2569 + "and not deployed on any region server. This may be transient.");
2570 hbi.setSkipChecks(true);
2571 return;
2574 errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
2575 + descriptiveName + " is a split parent in META, in HDFS, "
2576 + "and not deployed on any region server. This could be transient, "
2577 + "consider to run the catalog janitor first!");
2578 if (shouldFixSplitParents()) {
2579 setShouldRerun();
2580 resetSplitParent(hbi);
2582 } else if (inMeta && !inHdfs && !isDeployed) {
2583 errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
2584 + descriptiveName + " found in META, but not in HDFS "
2585 + "or deployed on any region server.");
2586 if (shouldFixMeta()) {
2587 deleteMetaRegion(hbi);
2589 } else if (inMeta && !inHdfs && isDeployed) {
2590 errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
2591 + " found in META, but not in HDFS, " +
2592 "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2593 // We treat HDFS as ground truth. Any information in meta is transient
2594 // and equivalent data can be regenerated. So, lets unassign and remove
2595 // these problems from META.
2596 if (shouldFixAssignments()) {
2597 errors.print("Trying to fix unassigned region...");
2598 undeployRegions(hbi);
2600 if (shouldFixMeta()) {
2601 // wait for it to complete
2602 deleteMetaRegion(hbi);
2604 } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
2605 errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
2606 + " not deployed on any region server.");
2607 tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2608 } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
2609 errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2610 "Region " + descriptiveName + " should not be deployed according " +
2611 "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2612 if (shouldFixAssignments()) {
2613 errors.print("Trying to close the region " + descriptiveName);
2614 setShouldRerun();
2615 HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2617 } else if (inMeta && inHdfs && isMultiplyDeployed) {
2618 errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
2619 + " is listed in hbase:meta on region server " + hbi.metaEntry.regionServer
2620 + " but is multiply assigned to region servers " +
2621 Joiner.on(", ").join(hbi.deployedOn));
2622 // If we are trying to fix the errors
2623 if (shouldFixAssignments()) {
2624 errors.print("Trying to fix assignment error...");
2625 setShouldRerun();
2626 HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2628 } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
2629 errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
2630 + descriptiveName + " listed in hbase:meta on region server " +
2631 hbi.metaEntry.regionServer + " but found on region server " +
2632 hbi.deployedOn.get(0));
2633 // If we are trying to fix the errors
2634 if (shouldFixAssignments()) {
2635 errors.print("Trying to fix assignment error...");
2636 setShouldRerun();
2637 HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2638 HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
2640 } else {
2641 errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
2642 " is in an unforeseen state:" +
2643 " inMeta=" + inMeta +
2644 " inHdfs=" + inHdfs +
2645 " isDeployed=" + isDeployed +
2646 " isMultiplyDeployed=" + isMultiplyDeployed +
2647 " deploymentMatchesMeta=" + deploymentMatchesMeta +
2648 " shouldBeDeployed=" + shouldBeDeployed);
2653 * Checks tables integrity. Goes over all regions and scans the tables.
2654 * Collects all the pieces for each table and checks if there are missing,
2655 * repeated or overlapping ones.
2656 * @throws IOException
2658 SortedMap<TableName, TableInfo> checkIntegrity() throws IOException {
2659 tablesInfo = new TreeMap<>();
2660 LOG.debug("There are " + regionInfoMap.size() + " region info entries");
2661 for (HbckInfo hbi : regionInfoMap.values()) {
2662 // Check only valid, working regions
2663 if (hbi.metaEntry == null) {
2664 // this assumes that consistency check has run loadMetaEntry
2665 Path p = hbi.getHdfsRegionDir();
2666 if (p == null) {
2667 errors.report("No regioninfo in Meta or HDFS. " + hbi);
2670 // TODO test.
2671 continue;
2673 if (hbi.metaEntry.regionServer == null) {
2674 errors.detail("Skipping region because no region server: " + hbi);
2675 continue;
2677 if (hbi.metaEntry.isOffline()) {
2678 errors.detail("Skipping region because it is offline: " + hbi);
2679 continue;
2681 if (hbi.containsOnlyHdfsEdits()) {
2682 errors.detail("Skipping region because it only contains edits" + hbi);
2683 continue;
2686 // Missing regionDir or over-deployment is checked elsewhere. Include
2687 // these cases in modTInfo, so we can evaluate those regions as part of
2688 // the region chain in META
2689 //if (hbi.foundRegionDir == null) continue;
2690 //if (hbi.deployedOn.size() != 1) continue;
2691 if (hbi.deployedOn.isEmpty()) continue;
2693 // We should be safe here
2694 TableName tableName = hbi.metaEntry.getTable();
2695 TableInfo modTInfo = tablesInfo.get(tableName);
2696 if (modTInfo == null) {
2697 modTInfo = new TableInfo(tableName);
2699 for (ServerName server : hbi.deployedOn) {
2700 modTInfo.addServer(server);
2703 if (!hbi.isSkipChecks()) {
2704 modTInfo.addRegionInfo(hbi);
2707 tablesInfo.put(tableName, modTInfo);
2710 loadTableInfosForTablesWithNoRegion();
2712 logParallelMerge();
2713 for (TableInfo tInfo : tablesInfo.values()) {
2714 TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2715 if (!tInfo.checkRegionChain(handler)) {
2716 errors.report("Found inconsistency in table " + tInfo.getName());
2719 return tablesInfo;
2722 /** Loads table info's for tables that may not have been included, since there are no
2723 * regions reported for the table, but table dir is there in hdfs
2725 private void loadTableInfosForTablesWithNoRegion() throws IOException {
2726 Map<String, TableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
2727 for (TableDescriptor htd : allTables.values()) {
2728 if (checkMetaOnly && !htd.isMetaTable()) {
2729 continue;
2732 TableName tableName = htd.getTableName();
2733 if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
2734 TableInfo tableInfo = new TableInfo(tableName);
2735 tableInfo.htds.add(htd);
2736 tablesInfo.put(htd.getTableName(), tableInfo);
2742 * Merge hdfs data by moving from contained HbckInfo into targetRegionDir.
2743 * @return number of file move fixes done to merge regions.
2745 public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
2746 int fileMoves = 0;
2747 String thread = Thread.currentThread().getName();
2748 LOG.debug("[" + thread + "] Contained region dir after close and pause");
2749 debugLsr(contained.getHdfsRegionDir());
2751 // rename the contained into the container.
2752 FileSystem fs = targetRegionDir.getFileSystem(getConf());
2753 FileStatus[] dirs = null;
2754 try {
2755 dirs = fs.listStatus(contained.getHdfsRegionDir());
2756 } catch (FileNotFoundException fnfe) {
2757 // region we are attempting to merge in is not present! Since this is a merge, there is
2758 // no harm skipping this region if it does not exist.
2759 if (!fs.exists(contained.getHdfsRegionDir())) {
2760 LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2761 + " is missing. Assuming already sidelined or moved.");
2762 } else {
2763 sidelineRegionDir(fs, contained);
2765 return fileMoves;
2768 if (dirs == null) {
2769 if (!fs.exists(contained.getHdfsRegionDir())) {
2770 LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2771 + " already sidelined.");
2772 } else {
2773 sidelineRegionDir(fs, contained);
2775 return fileMoves;
2778 for (FileStatus cf : dirs) {
2779 Path src = cf.getPath();
2780 Path dst = new Path(targetRegionDir, src.getName());
2782 if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
2783 // do not copy the old .regioninfo file.
2784 continue;
2787 if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2788 // do not copy the .oldlogs files
2789 continue;
2792 LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2793 // FileSystem.rename is inconsistent with directories -- if the
2794 // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
2795 // it moves the src into the dst dir resulting in (foo/a/b). If
2796 // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
2797 for (FileStatus hfile : fs.listStatus(src)) {
2798 boolean success = fs.rename(hfile.getPath(), dst);
2799 if (success) {
2800 fileMoves++;
2803 LOG.debug("[" + thread + "] Sideline directory contents:");
2804 debugLsr(targetRegionDir);
2807 // if all success.
2808 sidelineRegionDir(fs, contained);
2809 LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
2810 getSidelineDir());
2811 debugLsr(contained.getHdfsRegionDir());
2813 return fileMoves;
2817 static class WorkItemOverlapMerge implements Callable<Void> {
2818 private TableIntegrityErrorHandler handler;
2819 Collection<HbckInfo> overlapgroup;
2821 WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler) {
2822 this.handler = handler;
2823 this.overlapgroup = overlapgroup;
2826 @Override
2827 public Void call() throws Exception {
2828 handler.handleOverlapGroup(overlapgroup);
2829 return null;
2834 * Maintain information about a particular table.
2836 public class TableInfo {
2837 TableName tableName;
2838 TreeSet <ServerName> deployedOn;
2840 // backwards regions
2841 final List<HbckInfo> backwards = new ArrayList<>();
2843 // sidelined big overlapped regions
2844 final Map<Path, HbckInfo> sidelinedRegions = new HashMap<>();
2846 // region split calculator
2847 final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<>(cmp);
2849 // Histogram of different TableDescriptors found. Ideally there is only one!
2850 final Set<TableDescriptor> htds = new HashSet<>();
2852 // key = start split, values = set of splits in problem group
2853 final Multimap<byte[], HbckInfo> overlapGroups =
2854 TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
2856 // list of regions derived from meta entries.
2857 private ImmutableList<RegionInfo> regionsFromMeta = null;
2859 TableInfo(TableName name) {
2860 this.tableName = name;
2861 deployedOn = new TreeSet <>();
2865 * @return descriptor common to all regions. null if are none or multiple!
2867 private TableDescriptor getHTD() {
2868 if (htds.size() == 1) {
2869 return (TableDescriptor)htds.toArray()[0];
2870 } else {
2871 LOG.error("None/Multiple table descriptors found for table '"
2872 + tableName + "' regions: " + htds);
2874 return null;
2877 public void addRegionInfo(HbckInfo hir) {
2878 if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
2879 // end key is absolute end key, just add it.
2880 // ignore replicas other than primary for these checks
2881 if (hir.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2882 return;
2885 // if not the absolute end key, check for cycle
2886 if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
2887 errors.reportError(
2888 ERROR_CODE.REGION_CYCLE,
2889 String.format("The endkey for this region comes before the "
2890 + "startkey, startkey=%s, endkey=%s",
2891 Bytes.toStringBinary(hir.getStartKey()),
2892 Bytes.toStringBinary(hir.getEndKey())), this, hir);
2893 backwards.add(hir);
2894 return;
2897 // main case, add to split calculator
2898 // ignore replicas other than primary for these checks
2899 if (hir.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2902 public void addServer(ServerName server) {
2903 this.deployedOn.add(server);
2906 public TableName getName() {
2907 return tableName;
2910 public int getNumRegions() {
2911 return sc.getStarts().size() + backwards.size();
2914 public synchronized ImmutableList<RegionInfo> getRegionsFromMeta() {
2915 // lazy loaded, synchronized to ensure a single load
2916 if (regionsFromMeta == null) {
2917 List<RegionInfo> regions = new ArrayList<>();
2918 for (HbckInfo h : HBaseFsck.this.regionInfoMap.values()) {
2919 if (tableName.equals(h.getTableName())) {
2920 if (h.metaEntry != null) {
2921 regions.add(h.metaEntry);
2925 regionsFromMeta = Ordering.from(RegionInfo.COMPARATOR).immutableSortedCopy(regions);
2928 return regionsFromMeta;
2931 private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
2932 ErrorReporter errors;
2934 IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
2935 this.errors = errors;
2936 setTableInfo(ti);
2939 @Override
2940 public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
2941 errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2942 "First region should start with an empty key. You need to "
2943 + " create a new region and regioninfo in HDFS to plug the hole.",
2944 getTableInfo(), hi);
2947 @Override
2948 public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2949 errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2950 "Last region should end with an empty key. You need to "
2951 + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
2954 @Override
2955 public void handleDegenerateRegion(HbckInfo hi) throws IOException{
2956 errors.reportError(ERROR_CODE.DEGENERATE_REGION,
2957 "Region has the same start and end key.", getTableInfo(), hi);
2960 @Override
2961 public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
2962 byte[] key = r1.getStartKey();
2963 // dup start key
2964 errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2965 "Multiple regions have the same startkey: "
2966 + Bytes.toStringBinary(key), getTableInfo(), r1);
2967 errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2968 "Multiple regions have the same startkey: "
2969 + Bytes.toStringBinary(key), getTableInfo(), r2);
2972 @Override
2973 public void handleSplit(HbckInfo r1, HbckInfo r2) throws IOException{
2974 byte[] key = r1.getStartKey();
2975 // dup start key
2976 errors.reportError(ERROR_CODE.DUPE_ENDKEYS,
2977 "Multiple regions have the same regionID: "
2978 + Bytes.toStringBinary(key), getTableInfo(), r1);
2979 errors.reportError(ERROR_CODE.DUPE_ENDKEYS,
2980 "Multiple regions have the same regionID: "
2981 + Bytes.toStringBinary(key), getTableInfo(), r2);
2984 @Override
2985 public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
2986 errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
2987 "There is an overlap in the region chain.",
2988 getTableInfo(), hi1, hi2);
2991 @Override
2992 public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
2993 errors.reportError(
2994 ERROR_CODE.HOLE_IN_REGION_CHAIN,
2995 "There is a hole in the region chain between "
2996 + Bytes.toStringBinary(holeStart) + " and "
2997 + Bytes.toStringBinary(holeStop)
2998 + ". You need to create a new .regioninfo and region "
2999 + "dir in hdfs to plug the hole.");
3004 * This handler fixes integrity errors from hdfs information. There are
3005 * basically three classes of integrity problems 1) holes, 2) overlaps, and
3006 * 3) invalid regions.
3008 * This class overrides methods that fix holes and the overlap group case.
3009 * Individual cases of particular overlaps are handled by the general
3010 * overlap group merge repair case.
3012 * If hbase is online, this forces regions offline before doing merge
3013 * operations.
3015 private class HDFSIntegrityFixer extends IntegrityFixSuggester {
3016 Configuration conf;
3018 boolean fixOverlaps = true;
3020 HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
3021 boolean fixHoles, boolean fixOverlaps) {
3022 super(ti, errors);
3023 this.conf = conf;
3024 this.fixOverlaps = fixOverlaps;
3025 // TODO properly use fixHoles
3029 * This is a special case hole -- when the first region of a table is
3030 * missing from META, HBase doesn't acknowledge the existance of the
3031 * table.
3033 @Override
3034 public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
3035 errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
3036 "First region should start with an empty key. Creating a new " +
3037 "region and regioninfo in HDFS to plug the hole.",
3038 getTableInfo(), next);
3039 TableDescriptor htd = getTableInfo().getHTD();
3040 // from special EMPTY_START_ROW to next region's startKey
3041 RegionInfo newRegion = RegionInfoBuilder.newBuilder(htd.getTableName())
3042 .setStartKey(HConstants.EMPTY_START_ROW)
3043 .setEndKey(next.getStartKey())
3044 .build();
3046 // TODO test
3047 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
3048 LOG.info("Table region start key was not empty. Created new empty region: "
3049 + newRegion + " " +region);
3050 fixes++;
3053 @Override
3054 public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
3055 errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
3056 "Last region should end with an empty key. Creating a new "
3057 + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
3058 TableDescriptor htd = getTableInfo().getHTD();
3059 // from curEndKey to EMPTY_START_ROW
3060 RegionInfo newRegion = RegionInfoBuilder.newBuilder(htd.getTableName())
3061 .setStartKey(curEndKey)
3062 .setEndKey(HConstants.EMPTY_START_ROW)
3063 .build();
3065 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
3066 LOG.info("Table region end key was not empty. Created new empty region: " + newRegion
3067 + " " + region);
3068 fixes++;
3072 * There is a hole in the hdfs regions that violates the table integrity
3073 * rules. Create a new empty region that patches the hole.
3075 @Override
3076 public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
3077 errors.reportError(
3078 ERROR_CODE.HOLE_IN_REGION_CHAIN,
3079 "There is a hole in the region chain between "
3080 + Bytes.toStringBinary(holeStartKey) + " and "
3081 + Bytes.toStringBinary(holeStopKey)
3082 + ". Creating a new regioninfo and region "
3083 + "dir in hdfs to plug the hole.");
3084 TableDescriptor htd = getTableInfo().getHTD();
3085 RegionInfo newRegion = RegionInfoBuilder.newBuilder(htd.getTableName())
3086 .setStartKey(holeStartKey)
3087 .setEndKey(holeStopKey)
3088 .build();
3089 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
3090 LOG.info("Plugged hole by creating new empty region: "+ newRegion + " " +region);
3091 fixes++;
3095 * This takes set of overlapping regions and merges them into a single
3096 * region. This covers cases like degenerate regions, shared start key,
3097 * general overlaps, duplicate ranges, and partial overlapping regions.
3099 * Cases:
3100 * - Clean regions that overlap
3101 * - Only .oldlogs regions (can't find start/stop range, or figure out)
3103 * This is basically threadsafe, except for the fixer increment in mergeOverlaps.
3105 @Override
3106 public void handleOverlapGroup(Collection<HbckInfo> overlap)
3107 throws IOException {
3108 Preconditions.checkNotNull(overlap);
3109 Preconditions.checkArgument(overlap.size() >0);
3111 if (!this.fixOverlaps) {
3112 LOG.warn("Not attempting to repair overlaps.");
3113 return;
3116 if (overlap.size() > maxMerge) {
3117 LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
3118 "regions which is greater than " + maxMerge + ", the max number of regions to merge");
3119 if (sidelineBigOverlaps) {
3120 // we only sideline big overlapped groups that exceeds the max number of regions to merge
3121 sidelineBigOverlaps(overlap);
3123 return;
3125 if (shouldRemoveParents()) {
3126 removeParentsAndFixSplits(overlap);
3128 mergeOverlaps(overlap);
3131 void removeParentsAndFixSplits(Collection<HbckInfo> overlap) throws IOException {
3132 Pair<byte[], byte[]> range = null;
3133 HbckInfo parent = null;
3134 HbckInfo daughterA = null;
3135 HbckInfo daughterB = null;
3136 Collection<HbckInfo> daughters = new ArrayList<HbckInfo>(overlap);
3138 String thread = Thread.currentThread().getName();
3139 LOG.info("== [" + thread + "] Attempting fix splits in overlap state.");
3141 // we only can handle a single split per group at the time
3142 if (overlap.size() > 3) {
3143 LOG.info("Too many overlaps were found on this group, falling back to regular merge.");
3144 return;
3147 for (HbckInfo hi : overlap) {
3148 if (range == null) {
3149 range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
3150 } else {
3151 if (RegionSplitCalculator.BYTES_COMPARATOR
3152 .compare(hi.getStartKey(), range.getFirst()) < 0) {
3153 range.setFirst(hi.getStartKey());
3155 if (RegionSplitCalculator.BYTES_COMPARATOR
3156 .compare(hi.getEndKey(), range.getSecond()) > 0) {
3157 range.setSecond(hi.getEndKey());
3162 LOG.info("This group range is [" + Bytes.toStringBinary(range.getFirst()) + ", "
3163 + Bytes.toStringBinary(range.getSecond()) + "]");
3165 // attempt to find a possible parent for the edge case of a split
3166 for (HbckInfo hi : overlap) {
3167 if (Bytes.compareTo(hi.getHdfsHRI().getStartKey(), range.getFirst()) == 0
3168 && Bytes.compareTo(hi.getHdfsHRI().getEndKey(), range.getSecond()) == 0) {
3169 LOG.info("This is a parent for this group: " + hi.toString());
3170 parent = hi;
3174 // Remove parent regions from daughters collection
3175 if (parent != null) {
3176 daughters.remove(parent);
3179 // Lets verify that daughters share the regionID at split time and they
3180 // were created after the parent
3181 for (HbckInfo hi : daughters) {
3182 if (Bytes.compareTo(hi.getHdfsHRI().getStartKey(), range.getFirst()) == 0) {
3183 if (parent.getHdfsHRI().getRegionId() < hi.getHdfsHRI().getRegionId()) {
3184 daughterA = hi;
3187 if (Bytes.compareTo(hi.getHdfsHRI().getEndKey(), range.getSecond()) == 0) {
3188 if (parent.getHdfsHRI().getRegionId() < hi.getHdfsHRI().getRegionId()) {
3189 daughterB = hi;
3194 // daughters must share the same regionID and we should have a parent too
3195 if (daughterA.getHdfsHRI().getRegionId() != daughterB.getHdfsHRI().getRegionId() || parent == null)
3196 return;
3198 FileSystem fs = FileSystem.get(conf);
3199 LOG.info("Found parent: " + parent.getRegionNameAsString());
3200 LOG.info("Found potential daughter a: " + daughterA.getRegionNameAsString());
3201 LOG.info("Found potential daughter b: " + daughterB.getRegionNameAsString());
3202 LOG.info("Trying to fix parent in overlap by removing the parent.");
3203 try {
3204 closeRegion(parent);
3205 } catch (IOException ioe) {
3206 LOG.warn("Parent region could not be closed, continuing with regular merge...", ioe);
3207 return;
3208 } catch (InterruptedException ie) {
3209 LOG.warn("Parent region could not be closed, continuing with regular merge...", ie);
3210 return;
3213 try {
3214 offline(parent.getRegionName());
3215 } catch (IOException ioe) {
3216 LOG.warn("Unable to offline parent region: " + parent.getRegionNameAsString()
3217 + ". Just continuing with regular merge... ", ioe);
3218 return;
3221 try {
3222 HBaseFsckRepair.removeParentInMeta(conf, parent.getHdfsHRI());
3223 } catch (IOException ioe) {
3224 LOG.warn("Unable to remove parent region in META: " + parent.getRegionNameAsString()
3225 + ". Just continuing with regular merge... ", ioe);
3226 return;
3229 sidelineRegionDir(fs, parent);
3230 LOG.info("[" + thread + "] Sidelined parent region dir "+ parent.getHdfsRegionDir() + " into " +
3231 getSidelineDir());
3232 debugLsr(parent.getHdfsRegionDir());
3234 // Make sure we don't have the parents and daughters around
3235 overlap.remove(parent);
3236 overlap.remove(daughterA);
3237 overlap.remove(daughterB);
3239 LOG.info("Done fixing split.");
3243 void mergeOverlaps(Collection<HbckInfo> overlap)
3244 throws IOException {
3245 String thread = Thread.currentThread().getName();
3246 LOG.info("== [" + thread + "] Merging regions into one region: "
3247 + Joiner.on(",").join(overlap));
3248 // get the min / max range and close all concerned regions
3249 Pair<byte[], byte[]> range = null;
3250 for (HbckInfo hi : overlap) {
3251 if (range == null) {
3252 range = new Pair<>(hi.getStartKey(), hi.getEndKey());
3253 } else {
3254 if (RegionSplitCalculator.BYTES_COMPARATOR
3255 .compare(hi.getStartKey(), range.getFirst()) < 0) {
3256 range.setFirst(hi.getStartKey());
3258 if (RegionSplitCalculator.BYTES_COMPARATOR
3259 .compare(hi.getEndKey(), range.getSecond()) > 0) {
3260 range.setSecond(hi.getEndKey());
3263 // need to close files so delete can happen.
3264 LOG.debug("[" + thread + "] Closing region before moving data around: " + hi);
3265 LOG.debug("[" + thread + "] Contained region dir before close");
3266 debugLsr(hi.getHdfsRegionDir());
3267 try {
3268 LOG.info("[" + thread + "] Closing region: " + hi);
3269 closeRegion(hi);
3270 } catch (IOException ioe) {
3271 LOG.warn("[" + thread + "] Was unable to close region " + hi
3272 + ". Just continuing... ", ioe);
3273 } catch (InterruptedException e) {
3274 LOG.warn("[" + thread + "] Was unable to close region " + hi
3275 + ". Just continuing... ", e);
3278 try {
3279 LOG.info("[" + thread + "] Offlining region: " + hi);
3280 offline(hi.getRegionName());
3281 } catch (IOException ioe) {
3282 LOG.warn("[" + thread + "] Unable to offline region from master: " + hi
3283 + ". Just continuing... ", ioe);
3287 // create new empty container region.
3288 TableDescriptor htd = getTableInfo().getHTD();
3289 // from start key to end Key
3290 RegionInfo newRegion = RegionInfoBuilder.newBuilder(htd.getTableName())
3291 .setStartKey(range.getFirst())
3292 .setEndKey(range.getSecond())
3293 .build();
3294 HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
3295 LOG.info("[" + thread + "] Created new empty container region: " +
3296 newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
3297 debugLsr(region.getRegionFileSystem().getRegionDir());
3299 // all target regions are closed, should be able to safely cleanup.
3300 boolean didFix= false;
3301 Path target = region.getRegionFileSystem().getRegionDir();
3302 for (HbckInfo contained : overlap) {
3303 LOG.info("[" + thread + "] Merging " + contained + " into " + target );
3304 int merges = mergeRegionDirs(target, contained);
3305 if (merges > 0) {
3306 didFix = true;
3309 if (didFix) {
3310 fixes++;
3315 * Sideline some regions in a big overlap group so that it
3316 * will have fewer regions, and it is easier to merge them later on.
3318 * @param bigOverlap the overlapped group with regions more than maxMerge
3319 * @throws IOException
3321 void sidelineBigOverlaps(
3322 Collection<HbckInfo> bigOverlap) throws IOException {
3323 int overlapsToSideline = bigOverlap.size() - maxMerge;
3324 if (overlapsToSideline > maxOverlapsToSideline) {
3325 overlapsToSideline = maxOverlapsToSideline;
3327 List<HbckInfo> regionsToSideline =
3328 RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
3329 FileSystem fs = FileSystem.get(conf);
3330 for (HbckInfo regionToSideline: regionsToSideline) {
3331 try {
3332 LOG.info("Closing region: " + regionToSideline);
3333 closeRegion(regionToSideline);
3334 } catch (IOException ioe) {
3335 LOG.warn("Was unable to close region " + regionToSideline
3336 + ". Just continuing... ", ioe);
3337 } catch (InterruptedException e) {
3338 LOG.warn("Was unable to close region " + regionToSideline
3339 + ". Just continuing... ", e);
3342 try {
3343 LOG.info("Offlining region: " + regionToSideline);
3344 offline(regionToSideline.getRegionName());
3345 } catch (IOException ioe) {
3346 LOG.warn("Unable to offline region from master: " + regionToSideline
3347 + ". Just continuing... ", ioe);
3350 LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
3351 Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
3352 if (sidelineRegionDir != null) {
3353 sidelinedRegions.put(sidelineRegionDir, regionToSideline);
3354 LOG.info("After sidelined big overlapped region: "
3355 + regionToSideline.getRegionNameAsString()
3356 + " to " + sidelineRegionDir.toString());
3357 fixes++;
3364 * Check the region chain (from META) of this table. We are looking for
3365 * holes, overlaps, and cycles.
3366 * @return false if there are errors
3367 * @throws IOException
3369 public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
3370 // When table is disabled no need to check for the region chain. Some of the regions
3371 // accidently if deployed, this below code might report some issues like missing start
3372 // or end regions or region hole in chain and may try to fix which is unwanted.
3373 if (isTableDisabled(this.tableName)) {
3374 return true;
3376 int originalErrorsCount = errors.getErrorList().size();
3377 Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
3378 SortedSet<byte[]> splits = sc.getSplits();
3380 byte[] prevKey = null;
3381 byte[] problemKey = null;
3383 if (splits.isEmpty()) {
3384 // no region for this table
3385 handler.handleHoleInRegionChain(HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
3388 for (byte[] key : splits) {
3389 Collection<HbckInfo> ranges = regions.get(key);
3390 if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
3391 for (HbckInfo rng : ranges) {
3392 handler.handleRegionStartKeyNotEmpty(rng);
3396 // check for degenerate ranges
3397 for (HbckInfo rng : ranges) {
3398 // special endkey case converts '' to null
3399 byte[] endKey = rng.getEndKey();
3400 endKey = (endKey.length == 0) ? null : endKey;
3401 if (Bytes.equals(rng.getStartKey(),endKey)) {
3402 handler.handleDegenerateRegion(rng);
3406 if (ranges.size() == 1) {
3407 // this split key is ok -- no overlap, not a hole.
3408 if (problemKey != null) {
3409 LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
3411 problemKey = null; // fell through, no more problem.
3412 } else if (ranges.size() > 1) {
3413 // set the new problem key group name, if already have problem key, just
3414 // keep using it.
3415 if (problemKey == null) {
3416 // only for overlap regions.
3417 LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
3418 problemKey = key;
3420 overlapGroups.putAll(problemKey, ranges);
3422 // record errors
3423 ArrayList<HbckInfo> subRange = new ArrayList<>(ranges);
3424 // this dumb and n^2 but this shouldn't happen often
3425 for (HbckInfo r1 : ranges) {
3426 if (r1.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) continue;
3427 subRange.remove(r1);
3428 for (HbckInfo r2 : subRange) {
3429 if (r2.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) continue;
3430 // general case of same start key
3431 if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
3432 handler.handleDuplicateStartKeys(r1,r2);
3433 } else if (Bytes.compareTo(r1.getEndKey(), r2.getStartKey())==0 &&
3434 r1.getHdfsHRI().getRegionId() == r2.getHdfsHRI().getRegionId()) {
3435 LOG.info("this is a split, log to splits");
3436 handler.handleSplit(r1, r2);
3437 } else {
3438 // overlap
3439 handler.handleOverlapInRegionChain(r1, r2);
3444 } else if (ranges.isEmpty()) {
3445 if (problemKey != null) {
3446 LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
3448 problemKey = null;
3450 byte[] holeStopKey = sc.getSplits().higher(key);
3451 // if higher key is null we reached the top.
3452 if (holeStopKey != null) {
3453 // hole
3454 handler.handleHoleInRegionChain(key, holeStopKey);
3457 prevKey = key;
3460 // When the last region of a table is proper and having an empty end key, 'prevKey'
3461 // will be null.
3462 if (prevKey != null) {
3463 handler.handleRegionEndKeyNotEmpty(prevKey);
3466 // TODO fold this into the TableIntegrityHandler
3467 if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
3468 boolean ok = handleOverlapsParallel(handler, prevKey);
3469 if (!ok) {
3470 return false;
3472 } else {
3473 for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
3474 handler.handleOverlapGroup(overlap);
3478 if (details) {
3479 // do full region split map dump
3480 errors.print("---- Table '" + this.tableName
3481 + "': region split map");
3482 dump(splits, regions);
3483 errors.print("---- Table '" + this.tableName
3484 + "': overlap groups");
3485 dumpOverlapProblems(overlapGroups);
3486 errors.print("There are " + overlapGroups.keySet().size()
3487 + " overlap groups with " + overlapGroups.size()
3488 + " overlapping regions");
3490 if (!sidelinedRegions.isEmpty()) {
3491 LOG.warn("Sidelined big overlapped regions, please bulk load them!");
3492 errors.print("---- Table '" + this.tableName
3493 + "': sidelined big overlapped regions");
3494 dumpSidelinedRegions(sidelinedRegions);
3496 return errors.getErrorList().size() == originalErrorsCount;
3499 private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey)
3500 throws IOException {
3501 // we parallelize overlap handler for the case we have lots of groups to fix. We can
3502 // safely assume each group is independent.
3503 List<WorkItemOverlapMerge> merges = new ArrayList<>(overlapGroups.size());
3504 List<Future<Void>> rets;
3505 for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
3507 merges.add(new WorkItemOverlapMerge(overlap, handler));
3509 try {
3510 rets = executor.invokeAll(merges);
3511 } catch (InterruptedException e) {
3512 LOG.error("Overlap merges were interrupted", e);
3513 return false;
3515 for(int i=0; i<merges.size(); i++) {
3516 WorkItemOverlapMerge work = merges.get(i);
3517 Future<Void> f = rets.get(i);
3518 try {
3519 f.get();
3520 } catch(ExecutionException e) {
3521 LOG.warn("Failed to merge overlap group" + work, e.getCause());
3522 } catch (InterruptedException e) {
3523 LOG.error("Waiting for overlap merges was interrupted", e);
3524 return false;
3527 return true;
3531 * This dumps data in a visually reasonable way for visual debugging
3533 * @param splits
3534 * @param regions
3536 void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
3537 // we display this way because the last end key should be displayed as well.
3538 StringBuilder sb = new StringBuilder();
3539 for (byte[] k : splits) {
3540 sb.setLength(0); // clear out existing buffer, if any.
3541 sb.append(Bytes.toStringBinary(k) + ":\t");
3542 for (HbckInfo r : regions.get(k)) {
3543 sb.append("[ "+ r.toString() + ", "
3544 + Bytes.toStringBinary(r.getEndKey())+ "]\t");
3546 errors.print(sb.toString());
3551 public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
3552 // we display this way because the last end key should be displayed as
3553 // well.
3554 for (byte[] k : regions.keySet()) {
3555 errors.print(Bytes.toStringBinary(k) + ":");
3556 for (HbckInfo r : regions.get(k)) {
3557 errors.print("[ " + r.toString() + ", "
3558 + Bytes.toStringBinary(r.getEndKey()) + "]");
3560 errors.print("----");
3564 public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
3565 for (Map.Entry<Path, HbckInfo> entry: regions.entrySet()) {
3566 TableName tableName = entry.getValue().getTableName();
3567 Path path = entry.getKey();
3568 errors.print("This sidelined region dir should be bulk loaded: "
3569 + path.toString());
3570 errors.print("Bulk load command looks like: "
3571 + "hbase org.apache.hadoop.hbase.tool.LoadIncrementalHFiles "
3572 + path.toUri().getPath() + " "+ tableName);
3576 public Multimap<byte[], HbckInfo> getOverlapGroups(
3577 TableName table) {
3578 TableInfo ti = tablesInfo.get(table);
3579 return ti.overlapGroups;
3583 * Return a list of user-space table names whose metadata have not been
3584 * modified in the last few milliseconds specified by timelag
3585 * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
3586 * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
3587 * milliseconds specified by timelag, then the table is a candidate to be returned.
3588 * @return tables that have not been modified recently
3589 * @throws IOException if an error is encountered
3591 TableDescriptor[] getTables(AtomicInteger numSkipped) {
3592 List<TableName> tableNames = new ArrayList<>();
3593 long now = EnvironmentEdgeManager.currentTime();
3595 for (HbckInfo hbi : regionInfoMap.values()) {
3596 MetaEntry info = hbi.metaEntry;
3598 // if the start key is zero, then we have found the first region of a table.
3599 // pick only those tables that were not modified in the last few milliseconds.
3600 if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
3601 if (info.modTime + timelag < now) {
3602 tableNames.add(info.getTable());
3603 } else {
3604 numSkipped.incrementAndGet(); // one more in-flux table
3608 return getTableDescriptors(tableNames);
3611 TableDescriptor[] getTableDescriptors(List<TableName> tableNames) {
3612 LOG.info("getTableDescriptors == tableNames => " + tableNames);
3613 try (Connection conn = ConnectionFactory.createConnection(getConf());
3614 Admin admin = conn.getAdmin()) {
3615 List<TableDescriptor> tds = admin.listTableDescriptors(tableNames);
3616 return tds.toArray(new TableDescriptor[tds.size()]);
3617 } catch (IOException e) {
3618 LOG.debug("Exception getting table descriptors", e);
3620 return new TableDescriptor[0];
3624 * Gets the entry in regionInfo corresponding to the the given encoded
3625 * region name. If the region has not been seen yet, a new entry is added
3626 * and returned.
3628 private synchronized HbckInfo getOrCreateInfo(String name) {
3629 HbckInfo hbi = regionInfoMap.get(name);
3630 if (hbi == null) {
3631 hbi = new HbckInfo(null);
3632 regionInfoMap.put(name, hbi);
3634 return hbi;
3637 private void checkAndFixReplication() throws ReplicationException {
3638 ReplicationChecker checker = new ReplicationChecker(getConf(), zkw, errors);
3639 checker.checkUnDeletedQueues();
3641 if (checker.hasUnDeletedQueues() && this.fixReplication) {
3642 checker.fixUnDeletedQueues();
3643 setShouldRerun();
3648 * Check values in regionInfo for hbase:meta
3649 * Check if zero or more than one regions with hbase:meta are found.
3650 * If there are inconsistencies (i.e. zero or more than one regions
3651 * pretend to be holding the hbase:meta) try to fix that and report an error.
3652 * @throws IOException from HBaseFsckRepair functions
3653 * @throws KeeperException
3654 * @throws InterruptedException
3656 boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
3657 Map<Integer, HbckInfo> metaRegions = new HashMap<>();
3658 for (HbckInfo value : regionInfoMap.values()) {
3659 if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
3660 metaRegions.put(value.getReplicaId(), value);
3663 int metaReplication = admin.getTableDescriptor(TableName.META_TABLE_NAME)
3664 .getRegionReplication();
3665 boolean noProblem = true;
3666 // There will be always entries in regionInfoMap corresponding to hbase:meta & its replicas
3667 // Check the deployed servers. It should be exactly one server for each replica.
3668 for (int i = 0; i < metaReplication; i++) {
3669 HbckInfo metaHbckInfo = metaRegions.remove(i);
3670 List<ServerName> servers = new ArrayList<>();
3671 if (metaHbckInfo != null) {
3672 servers = metaHbckInfo.deployedOn;
3674 if (servers.size() != 1) {
3675 noProblem = false;
3676 if (servers.isEmpty()) {
3677 assignMetaReplica(i);
3678 } else if (servers.size() > 1) {
3679 errors
3680 .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId " +
3681 metaHbckInfo.getReplicaId() + " is found on more than one region.");
3682 if (shouldFixAssignments()) {
3683 errors.print("Trying to fix a problem with hbase:meta, replicaId " +
3684 metaHbckInfo.getReplicaId() +"..");
3685 setShouldRerun();
3686 // try fix it (treat is a dupe assignment)
3687 HBaseFsckRepair.fixMultiAssignment(connection, metaHbckInfo.metaEntry, servers);
3692 // unassign whatever is remaining in metaRegions. They are excess replicas.
3693 for (Map.Entry<Integer, HbckInfo> entry : metaRegions.entrySet()) {
3694 noProblem = false;
3695 errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
3696 "hbase:meta replicas are deployed in excess. Configured " + metaReplication +
3697 ", deployed " + metaRegions.size());
3698 if (shouldFixAssignments()) {
3699 errors.print("Trying to undeploy excess replica, replicaId: " + entry.getKey() +
3700 " of hbase:meta..");
3701 setShouldRerun();
3702 unassignMetaReplica(entry.getValue());
3705 // if noProblem is false, rerun hbck with hopefully fixed META
3706 // if noProblem is true, no errors, so continue normally
3707 return noProblem;
3710 private void unassignMetaReplica(HbckInfo hi) throws IOException, InterruptedException,
3711 KeeperException {
3712 undeployRegions(hi);
3713 ZKUtil.deleteNode(zkw, zkw.getZNodePaths().getZNodeForReplica(hi.metaEntry.getReplicaId()));
3716 private void assignMetaReplica(int replicaId)
3717 throws IOException, KeeperException, InterruptedException {
3718 errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta, replicaId " +
3719 replicaId +" is not found on any region.");
3720 if (shouldFixAssignments()) {
3721 errors.print("Trying to fix a problem with hbase:meta..");
3722 setShouldRerun();
3723 // try to fix it (treat it as unassigned region)
3724 RegionInfo h = RegionReplicaUtil.getRegionInfoForReplica(
3725 RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId);
3726 HBaseFsckRepair.fixUnassigned(admin, h);
3727 HBaseFsckRepair.waitUntilAssigned(admin, h);
3732 * Scan hbase:meta, adding all regions found to the regionInfo map.
3733 * @throws IOException if an error is encountered
3735 boolean loadMetaEntries() throws IOException {
3736 MetaTableAccessor.Visitor visitor = new MetaTableAccessor.Visitor() {
3737 int countRecord = 1;
3739 // comparator to sort KeyValues with latest modtime
3740 final Comparator<Cell> comp = new Comparator<Cell>() {
3741 @Override
3742 public int compare(Cell k1, Cell k2) {
3743 return Long.compare(k1.getTimestamp(), k2.getTimestamp());
3747 @Override
3748 public boolean visit(Result result) throws IOException {
3749 try {
3751 // record the latest modification of this META record
3752 long ts = Collections.max(result.listCells(), comp).getTimestamp();
3753 RegionLocations rl = MetaTableAccessor.getRegionLocations(result);
3754 if (rl == null) {
3755 emptyRegionInfoQualifiers.add(result);
3756 errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3757 "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3758 return true;
3760 ServerName sn = null;
3761 if (rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID) == null ||
3762 rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID).getRegionInfo() == null) {
3763 emptyRegionInfoQualifiers.add(result);
3764 errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3765 "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3766 return true;
3768 RegionInfo hri = rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID).getRegionInfo();
3769 if (!(isTableIncluded(hri.getTable())
3770 || hri.isMetaRegion())) {
3771 return true;
3773 PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(result);
3774 for (HRegionLocation h : rl.getRegionLocations()) {
3775 if (h == null || h.getRegionInfo() == null) {
3776 continue;
3778 sn = h.getServerName();
3779 hri = h.getRegionInfo();
3781 MetaEntry m = null;
3782 if (hri.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
3783 m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
3784 } else {
3785 m = new MetaEntry(hri, sn, ts, null, null);
3787 HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
3788 if (previous == null) {
3789 regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
3790 } else if (previous.metaEntry == null) {
3791 previous.metaEntry = m;
3792 } else {
3793 throw new IOException("Two entries in hbase:meta are same " + previous);
3796 PairOfSameType<RegionInfo> mergeRegions = MetaTableAccessor.getMergeRegions(result);
3797 for (RegionInfo mergeRegion : new RegionInfo[] {
3798 mergeRegions.getFirst(), mergeRegions.getSecond() }) {
3799 if (mergeRegion != null) {
3800 // This region is already been merged
3801 HbckInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName());
3802 hbInfo.setMerged(true);
3806 // show proof of progress to the user, once for every 100 records.
3807 if (countRecord % 100 == 0) {
3808 errors.progress();
3810 countRecord++;
3811 return true;
3812 } catch (RuntimeException e) {
3813 LOG.error("Result=" + result);
3814 throw e;
3818 if (!checkMetaOnly) {
3819 // Scan hbase:meta to pick up user regions
3820 MetaTableAccessor.fullScanRegions(connection, visitor);
3823 errors.print("");
3824 return true;
3828 * Stores the regioninfo entries scanned from META
3830 static class MetaEntry extends HRegionInfo {
3831 ServerName regionServer; // server hosting this region
3832 long modTime; // timestamp of most recent modification metadata
3833 RegionInfo splitA, splitB; //split daughters
3835 public MetaEntry(RegionInfo rinfo, ServerName regionServer, long modTime) {
3836 this(rinfo, regionServer, modTime, null, null);
3839 public MetaEntry(RegionInfo rinfo, ServerName regionServer, long modTime,
3840 RegionInfo splitA, RegionInfo splitB) {
3841 super(rinfo);
3842 this.regionServer = regionServer;
3843 this.modTime = modTime;
3844 this.splitA = splitA;
3845 this.splitB = splitB;
3848 @Override
3849 public boolean equals(Object o) {
3850 boolean superEq = super.equals(o);
3851 if (!superEq) {
3852 return superEq;
3855 MetaEntry me = (MetaEntry) o;
3856 if (!regionServer.equals(me.regionServer)) {
3857 return false;
3859 return (modTime == me.modTime);
3862 @Override
3863 public int hashCode() {
3864 int hash = Arrays.hashCode(getRegionName());
3865 hash = (int) (hash ^ getRegionId());
3866 hash ^= Arrays.hashCode(getStartKey());
3867 hash ^= Arrays.hashCode(getEndKey());
3868 hash ^= Boolean.valueOf(isOffline()).hashCode();
3869 hash ^= getTable().hashCode();
3870 if (regionServer != null) {
3871 hash ^= regionServer.hashCode();
3873 hash = (int) (hash ^ modTime);
3874 return hash;
3879 * Stores the regioninfo entries from HDFS
3881 static class HdfsEntry {
3882 RegionInfo hri;
3883 Path hdfsRegionDir = null;
3884 long hdfsRegionDirModTime = 0;
3885 boolean hdfsRegioninfoFilePresent = false;
3886 boolean hdfsOnlyEdits = false;
3890 * Stores the regioninfo retrieved from Online region servers.
3892 static class OnlineEntry {
3893 RegionInfo hri;
3894 ServerName hsa;
3896 @Override
3897 public String toString() {
3898 return hsa.toString() + ";" + hri.getRegionNameAsString();
3903 * Maintain information about a particular region. It gathers information
3904 * from three places -- HDFS, META, and region servers.
3906 public static class HbckInfo implements KeyRange {
3907 private MetaEntry metaEntry = null; // info in META
3908 private HdfsEntry hdfsEntry = null; // info in HDFS
3909 private List<OnlineEntry> deployedEntries = Lists.newArrayList(); // on Region Server
3910 private List<ServerName> deployedOn = Lists.newArrayList(); // info on RS's
3911 private boolean skipChecks = false; // whether to skip further checks to this region info.
3912 private boolean isMerged = false;// whether this region has already been merged into another one
3913 private int deployedReplicaId = RegionInfo.DEFAULT_REPLICA_ID;
3914 private RegionInfo primaryHRIForDeployedReplica = null;
3916 HbckInfo(MetaEntry metaEntry) {
3917 this.metaEntry = metaEntry;
3920 public synchronized int getReplicaId() {
3921 return metaEntry != null? metaEntry.getReplicaId(): deployedReplicaId;
3924 public synchronized void addServer(RegionInfo hri, ServerName server) {
3925 OnlineEntry rse = new OnlineEntry() ;
3926 rse.hri = hri;
3927 rse.hsa = server;
3928 this.deployedEntries.add(rse);
3929 this.deployedOn.add(server);
3930 // save the replicaId that we see deployed in the cluster
3931 this.deployedReplicaId = hri.getReplicaId();
3932 this.primaryHRIForDeployedReplica =
3933 RegionReplicaUtil.getRegionInfoForDefaultReplica(hri);
3936 @Override
3937 public synchronized String toString() {
3938 StringBuilder sb = new StringBuilder();
3939 sb.append("{ meta => ");
3940 sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
3941 sb.append( ", hdfs => " + getHdfsRegionDir());
3942 sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
3943 sb.append( ", replicaId => " + getReplicaId());
3944 sb.append(" }");
3945 return sb.toString();
3948 @Override
3949 public byte[] getStartKey() {
3950 if (this.metaEntry != null) {
3951 return this.metaEntry.getStartKey();
3952 } else if (this.hdfsEntry != null) {
3953 return this.hdfsEntry.hri.getStartKey();
3954 } else {
3955 LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3956 return null;
3960 @Override
3961 public byte[] getEndKey() {
3962 if (this.metaEntry != null) {
3963 return this.metaEntry.getEndKey();
3964 } else if (this.hdfsEntry != null) {
3965 return this.hdfsEntry.hri.getEndKey();
3966 } else {
3967 LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3968 return null;
3972 public TableName getTableName() {
3973 if (this.metaEntry != null) {
3974 return this.metaEntry.getTable();
3975 } else if (this.hdfsEntry != null) {
3976 // we are only guaranteed to have a path and not an HRI for hdfsEntry,
3977 // so we get the name from the Path
3978 Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
3979 return FSUtils.getTableName(tableDir);
3980 } else {
3981 // return the info from the first online/deployed hri
3982 for (OnlineEntry e : deployedEntries) {
3983 return e.hri.getTable();
3985 return null;
3989 public String getRegionNameAsString() {
3990 if (metaEntry != null) {
3991 return metaEntry.getRegionNameAsString();
3992 } else if (hdfsEntry != null) {
3993 if (hdfsEntry.hri != null) {
3994 return hdfsEntry.hri.getRegionNameAsString();
3996 } else {
3997 // return the info from the first online/deployed hri
3998 for (OnlineEntry e : deployedEntries) {
3999 return e.hri.getRegionNameAsString();
4002 return null;
4005 public byte[] getRegionName() {
4006 if (metaEntry != null) {
4007 return metaEntry.getRegionName();
4008 } else if (hdfsEntry != null) {
4009 return hdfsEntry.hri.getRegionName();
4010 } else {
4011 // return the info from the first online/deployed hri
4012 for (OnlineEntry e : deployedEntries) {
4013 return e.hri.getRegionName();
4015 return null;
4019 public RegionInfo getPrimaryHRIForDeployedReplica() {
4020 return primaryHRIForDeployedReplica;
4023 Path getHdfsRegionDir() {
4024 if (hdfsEntry == null) {
4025 return null;
4027 return hdfsEntry.hdfsRegionDir;
4030 boolean containsOnlyHdfsEdits() {
4031 if (hdfsEntry == null) {
4032 return false;
4034 return hdfsEntry.hdfsOnlyEdits;
4037 boolean isHdfsRegioninfoPresent() {
4038 if (hdfsEntry == null) {
4039 return false;
4041 return hdfsEntry.hdfsRegioninfoFilePresent;
4044 long getModTime() {
4045 if (hdfsEntry == null) {
4046 return 0;
4048 return hdfsEntry.hdfsRegionDirModTime;
4051 RegionInfo getHdfsHRI() {
4052 if (hdfsEntry == null) {
4053 return null;
4055 return hdfsEntry.hri;
4058 public void setSkipChecks(boolean skipChecks) {
4059 this.skipChecks = skipChecks;
4062 public boolean isSkipChecks() {
4063 return skipChecks;
4066 public void setMerged(boolean isMerged) {
4067 this.isMerged = isMerged;
4070 public boolean isMerged() {
4071 return this.isMerged;
4075 final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
4076 @Override
4077 public int compare(HbckInfo l, HbckInfo r) {
4078 if (l == r) {
4079 // same instance
4080 return 0;
4083 int tableCompare = l.getTableName().compareTo(r.getTableName());
4084 if (tableCompare != 0) {
4085 return tableCompare;
4088 int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
4089 l.getStartKey(), r.getStartKey());
4090 if (startComparison != 0) {
4091 return startComparison;
4094 // Special case for absolute endkey
4095 byte[] endKey = r.getEndKey();
4096 endKey = (endKey.length == 0) ? null : endKey;
4097 byte[] endKey2 = l.getEndKey();
4098 endKey2 = (endKey2.length == 0) ? null : endKey2;
4099 int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
4100 endKey2, endKey);
4102 if (endComparison != 0) {
4103 return endComparison;
4106 // use regionId as tiebreaker.
4107 // Null is considered after all possible values so make it bigger.
4108 if (l.hdfsEntry == null && r.hdfsEntry == null) {
4109 return 0;
4111 if (l.hdfsEntry == null && r.hdfsEntry != null) {
4112 return 1;
4114 // l.hdfsEntry must not be null
4115 if (r.hdfsEntry == null) {
4116 return -1;
4118 // both l.hdfsEntry and r.hdfsEntry must not be null.
4119 return Long.compare(l.hdfsEntry.hri.getRegionId(), r.hdfsEntry.hri.getRegionId());
4124 * Prints summary of all tables found on the system.
4126 private void printTableSummary(SortedMap<TableName, TableInfo> tablesInfo) {
4127 StringBuilder sb = new StringBuilder();
4128 int numOfSkippedRegions;
4129 errors.print("Summary:");
4130 for (TableInfo tInfo : tablesInfo.values()) {
4131 numOfSkippedRegions = (skippedRegions.containsKey(tInfo.getName())) ?
4132 skippedRegions.get(tInfo.getName()).size() : 0;
4134 if (errors.tableHasErrors(tInfo)) {
4135 errors.print("Table " + tInfo.getName() + " is inconsistent.");
4136 } else if (numOfSkippedRegions > 0){
4137 errors.print("Table " + tInfo.getName() + " is okay (with "
4138 + numOfSkippedRegions + " skipped regions).");
4140 else {
4141 errors.print("Table " + tInfo.getName() + " is okay.");
4143 errors.print(" Number of regions: " + tInfo.getNumRegions());
4144 if (numOfSkippedRegions > 0) {
4145 Set<String> skippedRegionStrings = skippedRegions.get(tInfo.getName());
4146 System.out.println(" Number of skipped regions: " + numOfSkippedRegions);
4147 System.out.println(" List of skipped regions:");
4148 for(String sr : skippedRegionStrings) {
4149 System.out.println(" " + sr);
4152 sb.setLength(0); // clear out existing buffer, if any.
4153 sb.append(" Deployed on: ");
4154 for (ServerName server : tInfo.deployedOn) {
4155 sb.append(" " + server.toString());
4157 errors.print(sb.toString());
4161 static ErrorReporter getErrorReporter(
4162 final Configuration conf) throws ClassNotFoundException {
4163 Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class);
4164 return ReflectionUtils.newInstance(reporter, conf);
4167 public interface ErrorReporter {
4168 enum ERROR_CODE {
4169 UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
4170 NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META,
4171 NOT_DEPLOYED, MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
4172 FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
4173 HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
4174 ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
4175 LINGERING_HFILELINK, WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, BOUNDARIES_ERROR,
4176 ORPHAN_TABLE_STATE, NO_TABLE_STATE, UNDELETED_REPLICATION_QUEUE, DUPE_ENDKEYS,
4177 UNSUPPORTED_OPTION, INVALID_TABLE
4179 void clear();
4180 void report(String message);
4181 void reportError(String message);
4182 void reportError(ERROR_CODE errorCode, String message);
4183 void reportError(ERROR_CODE errorCode, String message, TableInfo table);
4184 void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
4185 void reportError(
4186 ERROR_CODE errorCode,
4187 String message,
4188 TableInfo table,
4189 HbckInfo info1,
4190 HbckInfo info2
4192 int summarize();
4193 void detail(String details);
4194 ArrayList<ERROR_CODE> getErrorList();
4195 void progress();
4196 void print(String message);
4197 void resetErrors();
4198 boolean tableHasErrors(TableInfo table);
4201 static class PrintingErrorReporter implements ErrorReporter {
4202 public int errorCount = 0;
4203 private int showProgress;
4204 // How frequently calls to progress() will create output
4205 private static final int progressThreshold = 100;
4207 Set<TableInfo> errorTables = new HashSet<>();
4209 // for use by unit tests to verify which errors were discovered
4210 private ArrayList<ERROR_CODE> errorList = new ArrayList<>();
4212 @Override
4213 public void clear() {
4214 errorTables.clear();
4215 errorList.clear();
4216 errorCount = 0;
4219 @Override
4220 public synchronized void reportError(ERROR_CODE errorCode, String message) {
4221 if (errorCode == ERROR_CODE.WRONG_USAGE) {
4222 System.err.println(message);
4223 return;
4226 errorList.add(errorCode);
4227 if (!summary) {
4228 System.out.println("ERROR: " + message);
4230 errorCount++;
4231 showProgress = 0;
4234 @Override
4235 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
4236 errorTables.add(table);
4237 reportError(errorCode, message);
4240 @Override
4241 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
4242 HbckInfo info) {
4243 errorTables.add(table);
4244 String reference = "(region " + info.getRegionNameAsString() + ")";
4245 reportError(errorCode, reference + " " + message);
4248 @Override
4249 public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
4250 HbckInfo info1, HbckInfo info2) {
4251 errorTables.add(table);
4252 String reference = "(regions " + info1.getRegionNameAsString()
4253 + " and " + info2.getRegionNameAsString() + ")";
4254 reportError(errorCode, reference + " " + message);
4257 @Override
4258 public synchronized void reportError(String message) {
4259 reportError(ERROR_CODE.UNKNOWN, message);
4263 * Report error information, but do not increment the error count. Intended for cases
4264 * where the actual error would have been reported previously.
4265 * @param message
4267 @Override
4268 public synchronized void report(String message) {
4269 if (! summary) {
4270 System.out.println("ERROR: " + message);
4272 showProgress = 0;
4275 @Override
4276 public synchronized int summarize() {
4277 System.out.println(Integer.toString(errorCount) +
4278 " inconsistencies detected.");
4279 if (errorCount == 0) {
4280 System.out.println("Status: OK");
4281 return 0;
4282 } else {
4283 System.out.println("Status: INCONSISTENT");
4284 return -1;
4288 @Override
4289 public ArrayList<ERROR_CODE> getErrorList() {
4290 return errorList;
4293 @Override
4294 public synchronized void print(String message) {
4295 if (!summary) {
4296 System.out.println(message);
4300 @Override
4301 public boolean tableHasErrors(TableInfo table) {
4302 return errorTables.contains(table);
4305 @Override
4306 public void resetErrors() {
4307 errorCount = 0;
4310 @Override
4311 public synchronized void detail(String message) {
4312 if (details) {
4313 System.out.println(message);
4315 showProgress = 0;
4318 @Override
4319 public synchronized void progress() {
4320 if (showProgress++ == progressThreshold) {
4321 if (!summary) {
4322 System.out.print(".");
4324 showProgress = 0;
4330 * Contact a region server and get all information from it
4332 static class WorkItemRegion implements Callable<Void> {
4333 private final HBaseFsck hbck;
4334 private final ServerName rsinfo;
4335 private final ErrorReporter errors;
4336 private final ClusterConnection connection;
4338 WorkItemRegion(HBaseFsck hbck, ServerName info,
4339 ErrorReporter errors, ClusterConnection connection) {
4340 this.hbck = hbck;
4341 this.rsinfo = info;
4342 this.errors = errors;
4343 this.connection = connection;
4346 @Override
4347 public synchronized Void call() throws IOException {
4348 errors.progress();
4349 try {
4350 BlockingInterface server = connection.getAdmin(rsinfo);
4352 // list all online regions from this region server
4353 List<RegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
4354 regions = filterRegions(regions);
4356 if (details) {
4357 errors.detail("RegionServer: " + rsinfo.getServerName() +
4358 " number of regions: " + regions.size());
4359 for (RegionInfo rinfo: regions) {
4360 errors.detail(" " + rinfo.getRegionNameAsString() +
4361 " id: " + rinfo.getRegionId() +
4362 " encoded_name: " + rinfo.getEncodedName() +
4363 " start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
4364 " end: " + Bytes.toStringBinary(rinfo.getEndKey()));
4368 // check to see if the existence of this region matches the region in META
4369 for (RegionInfo r:regions) {
4370 HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
4371 hbi.addServer(r, rsinfo);
4373 } catch (IOException e) { // unable to connect to the region server.
4374 errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
4375 " Unable to fetch region information. " + e);
4376 throw e;
4378 return null;
4381 private List<RegionInfo> filterRegions(List<RegionInfo> regions) {
4382 List<RegionInfo> ret = Lists.newArrayList();
4383 for (RegionInfo hri : regions) {
4384 if (hri.isMetaRegion() || (!hbck.checkMetaOnly
4385 && hbck.isTableIncluded(hri.getTable()))) {
4386 ret.add(hri);
4389 return ret;
4394 * Contact hdfs and get all information about specified table directory into
4395 * regioninfo list.
4397 class WorkItemHdfsDir implements Callable<Void> {
4398 private FileStatus tableDir;
4399 private ErrorReporter errors;
4400 private FileSystem fs;
4402 WorkItemHdfsDir(FileSystem fs, ErrorReporter errors,
4403 FileStatus status) {
4404 this.fs = fs;
4405 this.tableDir = status;
4406 this.errors = errors;
4409 @Override
4410 public synchronized Void call() throws InterruptedException, ExecutionException {
4411 final Vector<Exception> exceptions = new Vector<>();
4413 try {
4414 final FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
4415 final List<Future<?>> futures = new ArrayList<>(regionDirs.length);
4417 for (final FileStatus regionDir : regionDirs) {
4418 errors.progress();
4419 final String encodedName = regionDir.getPath().getName();
4420 // ignore directories that aren't hexadecimal
4421 if (!encodedName.toLowerCase(Locale.ROOT).matches("[0-9a-f]+")) {
4422 continue;
4425 if (!exceptions.isEmpty()) {
4426 break;
4429 futures.add(executor.submit(new Runnable() {
4430 @Override
4431 public void run() {
4432 try {
4433 LOG.debug("Loading region info from hdfs:"+ regionDir.getPath());
4435 Path regioninfoFile = new Path(regionDir.getPath(), HRegionFileSystem.REGION_INFO_FILE);
4436 boolean regioninfoFileExists = fs.exists(regioninfoFile);
4438 if (!regioninfoFileExists) {
4439 // As tables become larger it is more and more likely that by the time you
4440 // reach a given region that it will be gone due to region splits/merges.
4441 if (!fs.exists(regionDir.getPath())) {
4442 LOG.warn("By the time we tried to process this region dir it was already gone: "
4443 + regionDir.getPath());
4444 return;
4448 HbckInfo hbi = HBaseFsck.this.getOrCreateInfo(encodedName);
4449 HdfsEntry he = new HdfsEntry();
4450 synchronized (hbi) {
4451 if (hbi.getHdfsRegionDir() != null) {
4452 errors.print("Directory " + encodedName + " duplicate??" +
4453 hbi.getHdfsRegionDir());
4456 he.hdfsRegionDir = regionDir.getPath();
4457 he.hdfsRegionDirModTime = regionDir.getModificationTime();
4458 he.hdfsRegioninfoFilePresent = regioninfoFileExists;
4459 // we add to orphan list when we attempt to read .regioninfo
4461 // Set a flag if this region contains only edits
4462 // This is special case if a region is left after split
4463 he.hdfsOnlyEdits = true;
4464 FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
4465 Path ePath = WALSplitter.getRegionDirRecoveredEditsDir(regionDir.getPath());
4466 for (FileStatus subDir : subDirs) {
4467 errors.progress();
4468 String sdName = subDir.getPath().getName();
4469 if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
4470 he.hdfsOnlyEdits = false;
4471 break;
4474 hbi.hdfsEntry = he;
4476 } catch (Exception e) {
4477 LOG.error("Could not load region dir", e);
4478 exceptions.add(e);
4481 }));
4484 // Ensure all pending tasks are complete (or that we run into an exception)
4485 for (Future<?> f : futures) {
4486 if (!exceptions.isEmpty()) {
4487 break;
4489 try {
4490 f.get();
4491 } catch (ExecutionException e) {
4492 LOG.error("Unexpected exec exception! Should've been caught already. (Bug?)", e);
4493 // Shouldn't happen, we already logged/caught any exceptions in the Runnable
4496 } catch (IOException e) {
4497 LOG.error("Cannot execute WorkItemHdfsDir for " + tableDir, e);
4498 exceptions.add(e);
4499 } finally {
4500 if (!exceptions.isEmpty()) {
4501 errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: "
4502 + tableDir.getPath().getName()
4503 + " Unable to fetch all HDFS region information. ");
4504 // Just throw the first exception as an indication something bad happened
4505 // Don't need to propagate all the exceptions, we already logged them all anyway
4506 throw new ExecutionException("First exception in WorkItemHdfsDir", exceptions.firstElement());
4509 return null;
4514 * Contact hdfs and get all information about specified table directory into
4515 * regioninfo list.
4517 static class WorkItemHdfsRegionInfo implements Callable<Void> {
4518 private HbckInfo hbi;
4519 private HBaseFsck hbck;
4520 private ErrorReporter errors;
4522 WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
4523 this.hbi = hbi;
4524 this.hbck = hbck;
4525 this.errors = errors;
4528 @Override
4529 public synchronized Void call() throws IOException {
4530 // only load entries that haven't been loaded yet.
4531 if (hbi.getHdfsHRI() == null) {
4532 try {
4533 errors.progress();
4534 hbck.loadHdfsRegioninfo(hbi);
4535 } catch (IOException ioe) {
4536 String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
4537 + hbi.getTableName() + " in hdfs dir "
4538 + hbi.getHdfsRegionDir()
4539 + "! It may be an invalid format or version file. Treating as "
4540 + "an orphaned regiondir.";
4541 errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
4542 try {
4543 hbck.debugLsr(hbi.getHdfsRegionDir());
4544 } catch (IOException ioe2) {
4545 LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
4546 throw ioe2;
4548 hbck.orphanHdfsDirs.add(hbi);
4549 throw ioe;
4552 return null;
4557 * Display the full report from fsck. This displays all live and dead region
4558 * servers, and all known regions.
4560 public static void setDisplayFullReport() {
4561 details = true;
4565 * Set exclusive mode.
4567 public static void setForceExclusive() {
4568 forceExclusive = true;
4572 * Only one instance of hbck can modify HBase at a time.
4574 public boolean isExclusive() {
4575 return fixAny || forceExclusive;
4579 * Set summary mode.
4580 * Print only summary of the tables and status (OK or INCONSISTENT)
4582 static void setSummary() {
4583 summary = true;
4587 * Set hbase:meta check mode.
4588 * Print only info about hbase:meta table deployment/state
4590 void setCheckMetaOnly() {
4591 checkMetaOnly = true;
4595 * Set region boundaries check mode.
4597 void setRegionBoundariesCheck() {
4598 checkRegionBoundaries = true;
4602 * Set replication fix mode.
4604 public void setFixReplication(boolean shouldFix) {
4605 fixReplication = shouldFix;
4606 fixAny |= shouldFix;
4609 public void setCleanReplicationBarrier(boolean shouldClean) {
4610 cleanReplicationBarrier = shouldClean;
4614 * Check if we should rerun fsck again. This checks if we've tried to
4615 * fix something and we should rerun fsck tool again.
4616 * Display the full report from fsck. This displays all live and dead
4617 * region servers, and all known regions.
4619 void setShouldRerun() {
4620 rerun = true;
4623 public boolean shouldRerun() {
4624 return rerun;
4628 * Fix inconsistencies found by fsck. This should try to fix errors (if any)
4629 * found by fsck utility.
4631 public void setFixAssignments(boolean shouldFix) {
4632 fixAssignments = shouldFix;
4633 fixAny |= shouldFix;
4636 boolean shouldFixAssignments() {
4637 return fixAssignments;
4640 public void setFixMeta(boolean shouldFix) {
4641 fixMeta = shouldFix;
4642 fixAny |= shouldFix;
4645 boolean shouldFixMeta() {
4646 return fixMeta;
4649 public void setFixEmptyMetaCells(boolean shouldFix) {
4650 fixEmptyMetaCells = shouldFix;
4651 fixAny |= shouldFix;
4654 boolean shouldFixEmptyMetaCells() {
4655 return fixEmptyMetaCells;
4658 public void setCheckHdfs(boolean checking) {
4659 checkHdfs = checking;
4662 boolean shouldCheckHdfs() {
4663 return checkHdfs;
4666 public void setFixHdfsHoles(boolean shouldFix) {
4667 fixHdfsHoles = shouldFix;
4668 fixAny |= shouldFix;
4671 boolean shouldFixHdfsHoles() {
4672 return fixHdfsHoles;
4675 public void setFixTableOrphans(boolean shouldFix) {
4676 fixTableOrphans = shouldFix;
4677 fixAny |= shouldFix;
4680 boolean shouldFixTableOrphans() {
4681 return fixTableOrphans;
4684 public void setFixHdfsOverlaps(boolean shouldFix) {
4685 fixHdfsOverlaps = shouldFix;
4686 fixAny |= shouldFix;
4689 boolean shouldFixHdfsOverlaps() {
4690 return fixHdfsOverlaps;
4693 public void setFixHdfsOrphans(boolean shouldFix) {
4694 fixHdfsOrphans = shouldFix;
4695 fixAny |= shouldFix;
4698 boolean shouldFixHdfsOrphans() {
4699 return fixHdfsOrphans;
4702 public void setFixVersionFile(boolean shouldFix) {
4703 fixVersionFile = shouldFix;
4704 fixAny |= shouldFix;
4707 public boolean shouldFixVersionFile() {
4708 return fixVersionFile;
4711 public void setSidelineBigOverlaps(boolean sbo) {
4712 this.sidelineBigOverlaps = sbo;
4715 public boolean shouldSidelineBigOverlaps() {
4716 return sidelineBigOverlaps;
4719 public void setFixSplitParents(boolean shouldFix) {
4720 fixSplitParents = shouldFix;
4721 fixAny |= shouldFix;
4724 public void setRemoveParents(boolean shouldFix) {
4725 removeParents = shouldFix;
4726 fixAny |= shouldFix;
4729 boolean shouldFixSplitParents() {
4730 return fixSplitParents;
4733 boolean shouldRemoveParents() {
4734 return removeParents;
4737 public void setFixReferenceFiles(boolean shouldFix) {
4738 fixReferenceFiles = shouldFix;
4739 fixAny |= shouldFix;
4742 boolean shouldFixReferenceFiles() {
4743 return fixReferenceFiles;
4746 public void setFixHFileLinks(boolean shouldFix) {
4747 fixHFileLinks = shouldFix;
4748 fixAny |= shouldFix;
4751 boolean shouldFixHFileLinks() {
4752 return fixHFileLinks;
4755 public boolean shouldIgnorePreCheckPermission() {
4756 return !fixAny || ignorePreCheckPermission;
4759 public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
4760 this.ignorePreCheckPermission = ignorePreCheckPermission;
4764 * @param mm maximum number of regions to merge into a single region.
4766 public void setMaxMerge(int mm) {
4767 this.maxMerge = mm;
4770 public int getMaxMerge() {
4771 return maxMerge;
4774 public void setMaxOverlapsToSideline(int mo) {
4775 this.maxOverlapsToSideline = mo;
4778 public int getMaxOverlapsToSideline() {
4779 return maxOverlapsToSideline;
4783 * Only check/fix tables specified by the list,
4784 * Empty list means all tables are included.
4786 boolean isTableIncluded(TableName table) {
4787 return (tablesIncluded.isEmpty()) || tablesIncluded.contains(table);
4790 public void includeTable(TableName table) {
4791 tablesIncluded.add(table);
4794 Set<TableName> getIncludedTables() {
4795 return new HashSet<>(tablesIncluded);
4799 * We are interested in only those tables that have not changed their state in
4800 * hbase:meta during the last few seconds specified by hbase.admin.fsck.timelag
4801 * @param seconds - the time in seconds
4803 public void setTimeLag(long seconds) {
4804 timelag = seconds * 1000; // convert to milliseconds
4809 * @param sidelineDir - HDFS path to sideline data
4811 public void setSidelineDir(String sidelineDir) {
4812 this.sidelineDir = new Path(sidelineDir);
4815 protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
4816 return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
4819 public HFileCorruptionChecker getHFilecorruptionChecker() {
4820 return hfcc;
4823 public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
4824 this.hfcc = hfcc;
4827 public void setRetCode(int code) {
4828 this.retcode = code;
4831 public int getRetCode() {
4832 return retcode;
4835 protected HBaseFsck printUsageAndExit() {
4836 StringWriter sw = new StringWriter(2048);
4837 PrintWriter out = new PrintWriter(sw);
4838 out.println("");
4839 out.println("-----------------------------------------------------------------------");
4840 out.println("NOTE: As of HBase version 2.0, the hbck tool is significantly changed.");
4841 out.println("In general, all Read-Only options are supported and can be be used");
4842 out.println("safely. Most -fix/ -repair options are NOT supported. Please see usage");
4843 out.println("below for details on which options are not supported.");
4844 out.println("-----------------------------------------------------------------------");
4845 out.println("");
4846 out.println("Usage: fsck [opts] {only tables}");
4847 out.println(" where [opts] are:");
4848 out.println(" -help Display help options (this)");
4849 out.println(" -details Display full report of all regions.");
4850 out.println(" -timelag <timeInSeconds> Process only regions that " +
4851 " have not experienced any metadata updates in the last " +
4852 " <timeInSeconds> seconds.");
4853 out.println(" -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
4854 " before checking if the fix worked if run with -fix");
4855 out.println(" -summary Print only summary of the tables and status.");
4856 out.println(" -metaonly Only check the state of the hbase:meta table.");
4857 out.println(" -sidelineDir <hdfs://> HDFS path to backup existing meta.");
4858 out.println(" -boundaries Verify that regions boundaries are the same between META and store files.");
4859 out.println(" -exclusive Abort if another hbck is exclusive or fixing.");
4861 out.println("");
4862 out.println(" Datafile Repair options: (expert features, use with caution!)");
4863 out.println(" -checkCorruptHFiles Check all Hfiles by opening them to make sure they are valid");
4864 out.println(" -sidelineCorruptHFiles Quarantine corrupted HFiles. implies -checkCorruptHFiles");
4866 out.println("");
4867 out.println(" Replication options");
4868 out.println(" -fixReplication Deletes replication queues for removed peers");
4870 out.println("");
4871 out.println(" Metadata Repair options supported as of version 2.0: (expert features, use with caution!)");
4872 out.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs.");
4873 out.println(" -fixReferenceFiles Try to offline lingering reference store files");
4874 out.println(" -fixHFileLinks Try to offline lingering HFileLinks");
4875 out.println(" -noHdfsChecking Don't load/check region info from HDFS."
4876 + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
4877 out.println(" -ignorePreCheckPermission ignore filesystem permission pre-check");
4879 out.println("");
4880 out.println("NOTE: Following options are NOT supported as of HBase version 2.0+.");
4881 out.println("");
4882 out.println(" UNSUPPORTED Metadata Repair options: (expert features, use with caution!)");
4883 out.println(" -fix Try to fix region assignments. This is for backwards compatiblity");
4884 out.println(" -fixAssignments Try to fix region assignments. Replaces the old -fix");
4885 out.println(" -fixMeta Try to fix meta problems. This assumes HDFS region info is good.");
4886 out.println(" -fixHdfsHoles Try to fix region holes in hdfs.");
4887 out.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs");
4888 out.println(" -fixTableOrphans Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
4889 out.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs.");
4890 out.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
4891 out.println(" -sidelineBigOverlaps When fixing region overlaps, allow to sideline big overlaps");
4892 out.println(" -maxOverlapsToSideline <n> When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
4893 out.println(" -fixSplitParents Try to force offline split parents to be online.");
4894 out.println(" -removeParents Try to offline and sideline lingering parents and keep daughter regions.");
4895 out.println(" -fixEmptyMetaCells Try to fix hbase:meta entries not referencing any region"
4896 + " (empty REGIONINFO_QUALIFIER rows)");
4898 out.println("");
4899 out.println(" UNSUPPORTED Metadata Repair shortcuts");
4900 out.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
4901 "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles" +
4902 "-fixHFileLinks");
4903 out.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
4904 out.println("");
4905 out.println(" Replication options");
4906 out.println(" -fixReplication Deletes replication queues for removed peers");
4907 out.println(" -cleanReplicationBrarier [tableName] clean the replication barriers " +
4908 "of a specified table, tableName is required");
4909 out.flush();
4910 errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
4912 setRetCode(-2);
4913 return this;
4917 * Main program
4919 * @param args
4920 * @throws Exception
4922 public static void main(String[] args) throws Exception {
4923 // create a fsck object
4924 Configuration conf = HBaseConfiguration.create();
4925 Path hbasedir = FSUtils.getRootDir(conf);
4926 URI defaultFs = hbasedir.getFileSystem(conf).getUri();
4927 FSUtils.setFsDefault(conf, new Path(defaultFs));
4928 int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
4929 System.exit(ret);
4933 * This is a Tool wrapper that gathers -Dxxx=yyy configuration settings from the command line.
4935 static class HBaseFsckTool extends Configured implements Tool {
4936 HBaseFsckTool(Configuration conf) { super(conf); }
4937 @Override
4938 public int run(String[] args) throws Exception {
4939 HBaseFsck hbck = new HBaseFsck(getConf());
4940 hbck.exec(hbck.executor, args);
4941 hbck.close();
4942 return hbck.getRetCode();
4946 public HBaseFsck exec(ExecutorService exec, String[] args)
4947 throws KeeperException, IOException, InterruptedException, ReplicationException {
4948 long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
4950 boolean checkCorruptHFiles = false;
4951 boolean sidelineCorruptHFiles = false;
4953 // Process command-line args.
4954 for (int i = 0; i < args.length; i++) {
4955 String cmd = args[i];
4956 if (cmd.equals("-help") || cmd.equals("-h")) {
4957 return printUsageAndExit();
4958 } else if (cmd.equals("-details")) {
4959 setDisplayFullReport();
4960 } else if (cmd.equals("-exclusive")) {
4961 setForceExclusive();
4962 } else if (cmd.equals("-timelag")) {
4963 if (i == args.length - 1) {
4964 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
4965 return printUsageAndExit();
4967 try {
4968 long timelag = Long.parseLong(args[++i]);
4969 setTimeLag(timelag);
4970 } catch (NumberFormatException e) {
4971 errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
4972 return printUsageAndExit();
4974 } else if (cmd.equals("-sleepBeforeRerun")) {
4975 if (i == args.length - 1) {
4976 errors.reportError(ERROR_CODE.WRONG_USAGE,
4977 "HBaseFsck: -sleepBeforeRerun needs a value.");
4978 return printUsageAndExit();
4980 try {
4981 sleepBeforeRerun = Long.parseLong(args[++i]);
4982 } catch (NumberFormatException e) {
4983 errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
4984 return printUsageAndExit();
4986 } else if (cmd.equals("-sidelineDir")) {
4987 if (i == args.length - 1) {
4988 errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
4989 return printUsageAndExit();
4991 setSidelineDir(args[++i]);
4992 } else if (cmd.equals("-fix")) {
4993 errors.reportError(ERROR_CODE.WRONG_USAGE,
4994 "This option is deprecated, please use -fixAssignments instead.");
4995 setFixAssignments(true);
4996 } else if (cmd.equals("-fixAssignments")) {
4997 setFixAssignments(true);
4998 } else if (cmd.equals("-fixMeta")) {
4999 setFixMeta(true);
5000 } else if (cmd.equals("-noHdfsChecking")) {
5001 setCheckHdfs(false);
5002 } else if (cmd.equals("-fixHdfsHoles")) {
5003 setFixHdfsHoles(true);
5004 } else if (cmd.equals("-fixHdfsOrphans")) {
5005 setFixHdfsOrphans(true);
5006 } else if (cmd.equals("-fixTableOrphans")) {
5007 setFixTableOrphans(true);
5008 } else if (cmd.equals("-fixHdfsOverlaps")) {
5009 setFixHdfsOverlaps(true);
5010 } else if (cmd.equals("-fixVersionFile")) {
5011 setFixVersionFile(true);
5012 } else if (cmd.equals("-sidelineBigOverlaps")) {
5013 setSidelineBigOverlaps(true);
5014 } else if (cmd.equals("-fixSplitParents")) {
5015 setFixSplitParents(true);
5016 } else if (cmd.equals("-removeParents")) {
5017 setRemoveParents(true);
5018 } else if (cmd.equals("-ignorePreCheckPermission")) {
5019 setIgnorePreCheckPermission(true);
5020 } else if (cmd.equals("-checkCorruptHFiles")) {
5021 checkCorruptHFiles = true;
5022 } else if (cmd.equals("-sidelineCorruptHFiles")) {
5023 sidelineCorruptHFiles = true;
5024 } else if (cmd.equals("-fixReferenceFiles")) {
5025 setFixReferenceFiles(true);
5026 } else if (cmd.equals("-fixHFileLinks")) {
5027 setFixHFileLinks(true);
5028 } else if (cmd.equals("-fixEmptyMetaCells")) {
5029 setFixEmptyMetaCells(true);
5030 } else if (cmd.equals("-repair")) {
5031 // this attempts to merge overlapping hdfs regions, needs testing
5032 // under load
5033 setFixHdfsHoles(true);
5034 setFixHdfsOrphans(true);
5035 setFixMeta(true);
5036 setFixAssignments(true);
5037 setFixHdfsOverlaps(true);
5038 setFixVersionFile(true);
5039 setSidelineBigOverlaps(true);
5040 setFixSplitParents(false);
5041 setCheckHdfs(true);
5042 setFixReferenceFiles(true);
5043 setFixHFileLinks(true);
5044 } else if (cmd.equals("-repairHoles")) {
5045 // this will make all missing hdfs regions available but may lose data
5046 setFixHdfsHoles(true);
5047 setFixHdfsOrphans(false);
5048 setFixMeta(true);
5049 setFixAssignments(true);
5050 setFixHdfsOverlaps(false);
5051 setSidelineBigOverlaps(false);
5052 setFixSplitParents(false);
5053 setCheckHdfs(true);
5054 } else if (cmd.equals("-maxOverlapsToSideline")) {
5055 if (i == args.length - 1) {
5056 errors.reportError(ERROR_CODE.WRONG_USAGE,
5057 "-maxOverlapsToSideline needs a numeric value argument.");
5058 return printUsageAndExit();
5060 try {
5061 int maxOverlapsToSideline = Integer.parseInt(args[++i]);
5062 setMaxOverlapsToSideline(maxOverlapsToSideline);
5063 } catch (NumberFormatException e) {
5064 errors.reportError(ERROR_CODE.WRONG_USAGE,
5065 "-maxOverlapsToSideline needs a numeric value argument.");
5066 return printUsageAndExit();
5068 } else if (cmd.equals("-maxMerge")) {
5069 if (i == args.length - 1) {
5070 errors.reportError(ERROR_CODE.WRONG_USAGE,
5071 "-maxMerge needs a numeric value argument.");
5072 return printUsageAndExit();
5074 try {
5075 int maxMerge = Integer.parseInt(args[++i]);
5076 setMaxMerge(maxMerge);
5077 } catch (NumberFormatException e) {
5078 errors.reportError(ERROR_CODE.WRONG_USAGE,
5079 "-maxMerge needs a numeric value argument.");
5080 return printUsageAndExit();
5082 } else if (cmd.equals("-summary")) {
5083 setSummary();
5084 } else if (cmd.equals("-metaonly")) {
5085 setCheckMetaOnly();
5086 } else if (cmd.equals("-boundaries")) {
5087 setRegionBoundariesCheck();
5088 } else if (cmd.equals("-fixReplication")) {
5089 setFixReplication(true);
5090 } else if (cmd.equals("-cleanReplicationBarrier")) {
5091 setCleanReplicationBarrier(true);
5092 if(args[++i].startsWith("-")){
5093 printUsageAndExit();
5095 setCleanReplicationBarrierTable(args[i]);
5096 } else if (cmd.startsWith("-")) {
5097 errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
5098 return printUsageAndExit();
5099 } else {
5100 includeTable(TableName.valueOf(cmd));
5101 errors.print("Allow checking/fixes for table: " + cmd);
5105 errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
5107 // pre-check current user has FS write permission or not
5108 try {
5109 preCheckPermission();
5110 } catch (AccessDeniedException ace) {
5111 Runtime.getRuntime().exit(-1);
5112 } catch (IOException ioe) {
5113 Runtime.getRuntime().exit(-1);
5116 // do the real work of hbck
5117 connect();
5119 // after connecting to server above, we have server version
5120 // check if unsupported option is specified based on server version
5121 if (!isOptionsSupported(args)) {
5122 return printUsageAndExit();
5125 try {
5126 // if corrupt file mode is on, first fix them since they may be opened later
5127 if (checkCorruptHFiles || sidelineCorruptHFiles) {
5128 LOG.info("Checking all hfiles for corruption");
5129 HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
5130 setHFileCorruptionChecker(hfcc); // so we can get result
5131 Collection<TableName> tables = getIncludedTables();
5132 Collection<Path> tableDirs = new ArrayList<>();
5133 Path rootdir = FSUtils.getRootDir(getConf());
5134 if (tables.size() > 0) {
5135 for (TableName t : tables) {
5136 tableDirs.add(FSUtils.getTableDir(rootdir, t));
5138 } else {
5139 tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
5141 hfcc.checkTables(tableDirs);
5142 hfcc.report(errors);
5145 // check and fix table integrity, region consistency.
5146 int code = onlineHbck();
5147 setRetCode(code);
5148 // If we have changed the HBase state it is better to run hbck again
5149 // to see if we haven't broken something else in the process.
5150 // We run it only once more because otherwise we can easily fall into
5151 // an infinite loop.
5152 if (shouldRerun()) {
5153 try {
5154 LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
5155 Thread.sleep(sleepBeforeRerun);
5156 } catch (InterruptedException ie) {
5157 LOG.warn("Interrupted while sleeping");
5158 return this;
5160 // Just report
5161 setFixAssignments(false);
5162 setFixMeta(false);
5163 setFixHdfsHoles(false);
5164 setFixHdfsOverlaps(false);
5165 setFixVersionFile(false);
5166 setFixTableOrphans(false);
5167 errors.resetErrors();
5168 code = onlineHbck();
5169 setRetCode(code);
5171 } finally {
5172 IOUtils.closeQuietly(this);
5174 return this;
5177 private boolean isOptionsSupported(String[] args) {
5178 boolean result = true;
5179 String hbaseServerVersion = status.getHBaseVersion();
5180 if (VersionInfo.compareVersion("2.any.any", hbaseServerVersion) < 0) {
5181 // Process command-line args.
5182 for (String arg : args) {
5183 if (unsupportedOptionsInV2.contains(arg)) {
5184 errors.reportError(ERROR_CODE.UNSUPPORTED_OPTION,
5185 "option '" + arg + "' is not " + "supportted!");
5186 result = false;
5187 break;
5191 return result;
5194 public void setCleanReplicationBarrierTable(String cleanReplicationBarrierTable) {
5195 this.cleanReplicationBarrierTable = TableName.valueOf(cleanReplicationBarrierTable);
5198 public void cleanReplicationBarrier() throws IOException {
5199 if (!cleanReplicationBarrier || cleanReplicationBarrierTable == null) {
5200 return;
5202 if (cleanReplicationBarrierTable.isSystemTable()) {
5203 errors.reportError(ERROR_CODE.INVALID_TABLE,
5204 "invalid table: " + cleanReplicationBarrierTable);
5205 return;
5208 boolean isGlobalScope = false;
5209 try {
5210 isGlobalScope = admin.getDescriptor(cleanReplicationBarrierTable).hasGlobalReplicationScope();
5211 } catch (TableNotFoundException e) {
5212 LOG.info("we may need to clean some erroneous data due to bugs");
5215 if (isGlobalScope) {
5216 errors.reportError(ERROR_CODE.INVALID_TABLE,
5217 "table's replication scope is global: " + cleanReplicationBarrierTable);
5218 return;
5220 List<byte[]> regionNames = new ArrayList<>();
5221 Scan barrierScan = new Scan();
5222 barrierScan.setCaching(100);
5223 barrierScan.addFamily(HConstants.REPLICATION_BARRIER_FAMILY);
5224 barrierScan
5225 .withStartRow(MetaTableAccessor.getTableStartRowForMeta(cleanReplicationBarrierTable,
5226 MetaTableAccessor.QueryType.REGION))
5227 .withStopRow(MetaTableAccessor.getTableStopRowForMeta(cleanReplicationBarrierTable,
5228 MetaTableAccessor.QueryType.REGION));
5229 Result result;
5230 try (ResultScanner scanner = meta.getScanner(barrierScan)) {
5231 while ((result = scanner.next()) != null) {
5232 regionNames.add(result.getRow());
5235 if (regionNames.size() <= 0) {
5236 errors.reportError(ERROR_CODE.INVALID_TABLE,
5237 "there is no barriers of this table: " + cleanReplicationBarrierTable);
5238 return;
5240 ReplicationQueueStorage queueStorage =
5241 ReplicationStorageFactory.getReplicationQueueStorage(zkw, getConf());
5242 List<ReplicationPeerDescription> peerDescriptions = admin.listReplicationPeers();
5243 if (peerDescriptions != null && peerDescriptions.size() > 0) {
5244 List<String> peers = peerDescriptions.stream()
5245 .filter(peerConfig -> ReplicationUtils.contains(peerConfig.getPeerConfig(),
5246 cleanReplicationBarrierTable))
5247 .map(peerConfig -> peerConfig.getPeerId()).collect(Collectors.toList());
5248 try {
5249 List<String> batch = new ArrayList<>();
5250 for (String peer : peers) {
5251 for (byte[] regionName : regionNames) {
5252 batch.add(RegionInfo.encodeRegionName(regionName));
5253 if (batch.size() % 100 == 0) {
5254 queueStorage.removeLastSequenceIds(peer, batch);
5255 batch.clear();
5258 if (batch.size() > 0) {
5259 queueStorage.removeLastSequenceIds(peer, batch);
5260 batch.clear();
5263 } catch (ReplicationException re) {
5264 throw new IOException(re);
5267 for (byte[] regionName : regionNames) {
5268 meta.delete(new Delete(regionName).addFamily(HConstants.REPLICATION_BARRIER_FAMILY));
5270 setShouldRerun();
5274 * ls -r for debugging purposes
5276 void debugLsr(Path p) throws IOException {
5277 debugLsr(getConf(), p, errors);
5281 * ls -r for debugging purposes
5283 public static void debugLsr(Configuration conf,
5284 Path p) throws IOException {
5285 debugLsr(conf, p, new PrintingErrorReporter());
5289 * ls -r for debugging purposes
5291 public static void debugLsr(Configuration conf,
5292 Path p, ErrorReporter errors) throws IOException {
5293 if (!LOG.isDebugEnabled() || p == null) {
5294 return;
5296 FileSystem fs = p.getFileSystem(conf);
5298 if (!fs.exists(p)) {
5299 // nothing
5300 return;
5302 errors.print(p.toString());
5304 if (fs.isFile(p)) {
5305 return;
5308 if (fs.getFileStatus(p).isDirectory()) {
5309 FileStatus[] fss= fs.listStatus(p);
5310 for (FileStatus status : fss) {
5311 debugLsr(conf, status.getPath(), errors);