HBASE-18823 Apply RegionInfo to MasterObserver/RegionObserver/WALObserver
[hbase.git] / hbase-server / src / test / java / org / apache / hadoop / hbase / util / BaseTestHBaseFsck.java
blob58ddc2d69dd8796d5e2364d50ebf6cef6b720c28
1 /**
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
19 package org.apache.hadoop.hbase.util;
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.Collection;
24 import java.util.EnumSet;
25 import java.util.HashMap;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.UUID;
29 import java.util.concurrent.CountDownLatch;
30 import java.util.concurrent.ExecutorService;
31 import java.util.concurrent.ScheduledThreadPoolExecutor;
33 import org.apache.commons.logging.Log;
34 import org.apache.commons.logging.LogFactory;
35 import org.apache.hadoop.conf.Configuration;
36 import org.apache.hadoop.fs.FileStatus;
37 import org.apache.hadoop.fs.FileSystem;
38 import org.apache.hadoop.fs.Path;
39 import org.apache.hadoop.hbase.ClusterStatus;
40 import org.apache.hadoop.hbase.ClusterStatus.Option;
41 import org.apache.hadoop.hbase.HBaseTestingUtility;
42 import org.apache.hadoop.hbase.HColumnDescriptor;
43 import org.apache.hadoop.hbase.HConstants;
44 import org.apache.hadoop.hbase.HRegionInfo;
45 import org.apache.hadoop.hbase.HRegionLocation;
46 import org.apache.hadoop.hbase.HTableDescriptor;
47 import org.apache.hadoop.hbase.MetaTableAccessor;
48 import org.apache.hadoop.hbase.ServerName;
49 import org.apache.hadoop.hbase.TableName;
50 import org.apache.hadoop.hbase.client.Admin;
51 import org.apache.hadoop.hbase.client.ClusterConnection;
52 import org.apache.hadoop.hbase.client.Connection;
53 import org.apache.hadoop.hbase.client.ConnectionFactory;
54 import org.apache.hadoop.hbase.client.Delete;
55 import org.apache.hadoop.hbase.client.Put;
56 import org.apache.hadoop.hbase.client.RegionInfo;
57 import org.apache.hadoop.hbase.client.RegionLocator;
58 import org.apache.hadoop.hbase.client.Scan;
59 import org.apache.hadoop.hbase.client.Table;
60 import org.apache.hadoop.hbase.client.TableDescriptor;
61 import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
62 import org.apache.hadoop.hbase.coprocessor.MasterObserver;
63 import org.apache.hadoop.hbase.coprocessor.ObserverContext;
64 import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
65 import org.apache.hadoop.hbase.master.assignment.RegionStates;
66 import org.apache.hadoop.hbase.mob.MobFileName;
67 import org.apache.hadoop.hbase.mob.MobUtils;
68 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
69 import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
70 import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos;
71 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
72 import org.apache.hadoop.hbase.util.HBaseFsck.HbckInfo;
73 import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo;
74 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
75 import org.apache.zookeeper.KeeperException;
76 import org.junit.rules.TestName;
78 import static org.junit.Assert.assertEquals;
79 import static org.junit.Assert.fail;
81 /**
82 * This is the base class for HBaseFsck's ability to detect reasons for inconsistent tables.
84 * Actual tests are in :
85 * TestHBaseFsckTwoRS
86 * TestHBaseFsckOneRS
87 * TestHBaseFsckMOB
88 * TestHBaseFsckReplicas
90 public class BaseTestHBaseFsck {
91 static final int POOL_SIZE = 7;
92 protected static final Log LOG = LogFactory.getLog(BaseTestHBaseFsck.class);
93 protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
94 protected final static Configuration conf = TEST_UTIL.getConfiguration();
95 protected final static String FAM_STR = "fam";
96 protected final static byte[] FAM = Bytes.toBytes(FAM_STR);
97 protected final static int REGION_ONLINE_TIMEOUT = 800;
98 protected static AssignmentManager assignmentManager;
99 protected static RegionStates regionStates;
100 protected static ExecutorService tableExecutorService;
101 protected static ScheduledThreadPoolExecutor hbfsckExecutorService;
102 protected static ClusterConnection connection;
103 protected static Admin admin;
105 // for the instance, reset every test run
106 protected Table tbl;
107 protected final static byte[][] SPLITS = new byte[][] { Bytes.toBytes("A"),
108 Bytes.toBytes("B"), Bytes.toBytes("C") };
109 // one row per region.
110 protected final static byte[][] ROWKEYS= new byte[][] {
111 Bytes.toBytes("00"), Bytes.toBytes("50"), Bytes.toBytes("A0"), Bytes.toBytes("A5"),
112 Bytes.toBytes("B0"), Bytes.toBytes("B5"), Bytes.toBytes("C0"), Bytes.toBytes("C5") };
116 * Create a new region in META.
118 protected HRegionInfo createRegion(final HTableDescriptor
119 htd, byte[] startKey, byte[] endKey)
120 throws IOException {
121 Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
122 HRegionInfo hri = new HRegionInfo(htd.getTableName(), startKey, endKey);
123 MetaTableAccessor.addRegionToMeta(meta, hri);
124 meta.close();
125 return hri;
129 * Debugging method to dump the contents of meta.
131 protected void dumpMeta(TableName tableName) throws IOException {
132 List<byte[]> metaRows = TEST_UTIL.getMetaTableRows(tableName);
133 for (byte[] row : metaRows) {
134 LOG.info(Bytes.toString(row));
139 * This method is used to undeploy a region -- close it and attempt to
140 * remove its state from the Master.
142 protected void undeployRegion(Connection conn, ServerName sn,
143 HRegionInfo hri) throws IOException, InterruptedException {
144 try {
145 HBaseFsckRepair.closeRegionSilentlyAndWait(conn, sn, hri);
146 if (!hri.isMetaTable()) {
147 admin.offline(hri.getRegionName());
149 } catch (IOException ioe) {
150 LOG.warn("Got exception when attempting to offline region "
151 + Bytes.toString(hri.getRegionName()), ioe);
155 * Delete a region from assignments, meta, or completely from hdfs.
156 * @param unassign if true unassign region if assigned
157 * @param metaRow if true remove region's row from META
158 * @param hdfs if true remove region's dir in HDFS
160 protected void deleteRegion(Configuration conf, final HTableDescriptor htd,
161 byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
162 boolean hdfs) throws IOException, InterruptedException {
163 deleteRegion(conf, htd, startKey, endKey, unassign, metaRow, hdfs, false,
164 HRegionInfo.DEFAULT_REPLICA_ID);
168 * Delete a region from assignments, meta, or completely from hdfs.
169 * @param unassign if true unassign region if assigned
170 * @param metaRow if true remove region's row from META
171 * @param hdfs if true remove region's dir in HDFS
172 * @param regionInfoOnly if true remove a region dir's .regioninfo file
173 * @param replicaId replica id
175 protected void deleteRegion(Configuration conf, final HTableDescriptor htd,
176 byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
177 boolean hdfs, boolean regionInfoOnly, int replicaId)
178 throws IOException, InterruptedException {
179 LOG.info("** Before delete:");
180 dumpMeta(htd.getTableName());
182 List<HRegionLocation> locations;
183 try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
184 locations = rl.getAllRegionLocations();
187 for (HRegionLocation location : locations) {
188 HRegionInfo hri = location.getRegionInfo();
189 ServerName hsa = location.getServerName();
190 if (Bytes.compareTo(hri.getStartKey(), startKey) == 0
191 && Bytes.compareTo(hri.getEndKey(), endKey) == 0
192 && hri.getReplicaId() == replicaId) {
194 LOG.info("RegionName: " +hri.getRegionNameAsString());
195 byte[] deleteRow = hri.getRegionName();
197 if (unassign) {
198 LOG.info("Undeploying region " + hri + " from server " + hsa);
199 undeployRegion(connection, hsa, hri);
202 if (regionInfoOnly) {
203 LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
204 Path rootDir = FSUtils.getRootDir(conf);
205 FileSystem fs = rootDir.getFileSystem(conf);
206 Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
207 hri.getEncodedName());
208 Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
209 fs.delete(hriPath, true);
212 if (hdfs) {
213 LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
214 Path rootDir = FSUtils.getRootDir(conf);
215 FileSystem fs = rootDir.getFileSystem(conf);
216 Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
217 hri.getEncodedName());
218 HBaseFsck.debugLsr(conf, p);
219 boolean success = fs.delete(p, true);
220 LOG.info("Deleted " + p + " sucessfully? " + success);
221 HBaseFsck.debugLsr(conf, p);
224 if (metaRow) {
225 try (Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService)) {
226 Delete delete = new Delete(deleteRow);
227 meta.delete(delete);
231 LOG.info(hri.toString() + hsa.toString());
234 TEST_UTIL.getMetaTableRows(htd.getTableName());
235 LOG.info("*** After delete:");
236 dumpMeta(htd.getTableName());
240 * Setup a clean table before we start mucking with it.
242 * It will set tbl which needs to be closed after test
244 * @throws IOException
245 * @throws InterruptedException
246 * @throws KeeperException
248 void setupTable(TableName tablename) throws Exception {
249 setupTableWithRegionReplica(tablename, 1);
253 * Setup a clean table with a certain region_replica count
255 * It will set tbl which needs to be closed after test
257 * @throws Exception
259 void setupTableWithRegionReplica(TableName tablename, int replicaCount) throws Exception {
260 HTableDescriptor desc = new HTableDescriptor(tablename);
261 desc.setRegionReplication(replicaCount);
262 HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
263 desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
264 createTable(TEST_UTIL, desc, SPLITS);
266 tbl = connection.getTable(tablename, tableExecutorService);
267 List<Put> puts = new ArrayList<>(ROWKEYS.length);
268 for (byte[] row : ROWKEYS) {
269 Put p = new Put(row);
270 p.addColumn(FAM, Bytes.toBytes("val"), row);
271 puts.add(p);
273 tbl.put(puts);
277 * Setup a clean table with a mob-enabled column.
279 * @param tablename The name of a table to be created.
280 * @throws Exception
282 void setupMobTable(TableName tablename) throws Exception {
283 HTableDescriptor desc = new HTableDescriptor(tablename);
284 HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
285 hcd.setMobEnabled(true);
286 hcd.setMobThreshold(0);
287 desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
288 createTable(TEST_UTIL, desc, SPLITS);
290 tbl = connection.getTable(tablename, tableExecutorService);
291 List<Put> puts = new ArrayList<>(ROWKEYS.length);
292 for (byte[] row : ROWKEYS) {
293 Put p = new Put(row);
294 p.addColumn(FAM, Bytes.toBytes("val"), row);
295 puts.add(p);
297 tbl.put(puts);
301 * Counts the number of rows to verify data loss or non-dataloss.
303 int countRows() throws IOException {
304 return TEST_UTIL.countRows(tbl);
308 * Counts the number of rows to verify data loss or non-dataloss.
310 int countRows(byte[] start, byte[] end) throws IOException {
311 return TEST_UTIL.countRows(tbl, new Scan(start, end));
315 * delete table in preparation for next test
317 * @param tablename
318 * @throws IOException
320 void cleanupTable(TableName tablename) throws Exception {
321 if (tbl != null) {
322 tbl.close();
323 tbl = null;
326 ((ClusterConnection) connection).clearRegionCache();
327 deleteTable(TEST_UTIL, tablename);
331 * Get region info from local cluster.
333 Map<ServerName, List<String>> getDeployedHRIs(final Admin admin) throws IOException {
334 ClusterStatus status = admin.getClusterStatus(EnumSet.of(Option.LIVE_SERVERS));
335 Collection<ServerName> regionServers = status.getServers();
336 Map<ServerName, List<String>> mm = new HashMap<>();
337 for (ServerName hsi : regionServers) {
338 AdminProtos.AdminService.BlockingInterface server = connection.getAdmin(hsi);
340 // list all online regions from this region server
341 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
342 List<String> regionNames = new ArrayList<>(regions.size());
343 for (HRegionInfo hri : regions) {
344 regionNames.add(hri.getRegionNameAsString());
346 mm.put(hsi, regionNames);
348 return mm;
352 * Returns the HSI a region info is on.
354 ServerName findDeployedHSI(Map<ServerName, List<String>> mm, HRegionInfo hri) {
355 for (Map.Entry<ServerName,List <String>> e : mm.entrySet()) {
356 if (e.getValue().contains(hri.getRegionNameAsString())) {
357 return e.getKey();
360 return null;
363 public void deleteTableDir(TableName table) throws IOException {
364 Path rootDir = FSUtils.getRootDir(conf);
365 FileSystem fs = rootDir.getFileSystem(conf);
366 Path p = FSUtils.getTableDir(rootDir, table);
367 HBaseFsck.debugLsr(conf, p);
368 boolean success = fs.delete(p, true);
369 LOG.info("Deleted " + p + " sucessfully? " + success);
373 * We don't have an easy way to verify that a flush completed, so we loop until we find a
374 * legitimate hfile and return it.
375 * @param fs
376 * @param table
377 * @return Path of a flushed hfile.
378 * @throws IOException
380 Path getFlushedHFile(FileSystem fs, TableName table) throws IOException {
381 Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
382 Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
383 Path famDir = new Path(regionDir, FAM_STR);
385 // keep doing this until we get a legit hfile
386 while (true) {
387 FileStatus[] hfFss = fs.listStatus(famDir);
388 if (hfFss.length == 0) {
389 continue;
391 for (FileStatus hfs : hfFss) {
392 if (!hfs.isDirectory()) {
393 return hfs.getPath();
400 * Gets flushed mob files.
401 * @param fs The current file system.
402 * @param table The current table name.
403 * @return Path of a flushed hfile.
404 * @throws IOException
406 Path getFlushedMobFile(FileSystem fs, TableName table) throws IOException {
407 Path famDir = MobUtils.getMobFamilyPath(conf, table, FAM_STR);
409 // keep doing this until we get a legit hfile
410 while (true) {
411 FileStatus[] hfFss = fs.listStatus(famDir);
412 if (hfFss.length == 0) {
413 continue;
415 for (FileStatus hfs : hfFss) {
416 if (!hfs.isDirectory()) {
417 return hfs.getPath();
424 * Creates a new mob file name by the old one.
425 * @param oldFileName The old mob file name.
426 * @return The new mob file name.
428 String createMobFileName(String oldFileName) {
429 MobFileName mobFileName = MobFileName.create(oldFileName);
430 String startKey = mobFileName.getStartKey();
431 String date = mobFileName.getDate();
432 return MobFileName.create(startKey, date, UUID.randomUUID().toString().replaceAll("-", ""))
433 .getFileName();
440 * Test that use this should have a timeout, because this method could potentially wait forever.
442 protected void doQuarantineTest(TableName table, HBaseFsck hbck, int check,
443 int corrupt, int fail, int quar, int missing) throws Exception {
444 try {
445 setupTable(table);
446 assertEquals(ROWKEYS.length, countRows());
447 admin.flush(table); // flush is async.
449 // Mess it up by leaving a hole in the assignment, meta, and hdfs data
450 admin.disableTable(table);
452 String[] args = {"-sidelineCorruptHFiles", "-repairHoles", "-ignorePreCheckPermission",
453 table.getNameAsString()};
454 HBaseFsck res = hbck.exec(hbfsckExecutorService, args);
456 HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
457 assertEquals(hfcc.getHFilesChecked(), check);
458 assertEquals(hfcc.getCorrupted().size(), corrupt);
459 assertEquals(hfcc.getFailures().size(), fail);
460 assertEquals(hfcc.getQuarantined().size(), quar);
461 assertEquals(hfcc.getMissing().size(), missing);
463 // its been fixed, verify that we can enable
464 admin.enableTableAsync(table);
465 while (!admin.isTableEnabled(table)) {
466 try {
467 Thread.sleep(250);
468 } catch (InterruptedException e) {
469 e.printStackTrace();
470 fail("Interrupted when trying to enable table " + table);
473 } finally {
474 cleanupTable(table);
479 static class MockErrorReporter implements ErrorReporter {
480 static int calledCount = 0;
482 @Override
483 public void clear() {
484 calledCount++;
487 @Override
488 public void report(String message) {
489 calledCount++;
492 @Override
493 public void reportError(String message) {
494 calledCount++;
497 @Override
498 public void reportError(ERROR_CODE errorCode, String message) {
499 calledCount++;
502 @Override
503 public void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
504 calledCount++;
507 @Override
508 public void reportError(ERROR_CODE errorCode,
509 String message, TableInfo table, HbckInfo info) {
510 calledCount++;
513 @Override
514 public void reportError(ERROR_CODE errorCode, String message,
515 TableInfo table, HbckInfo info1, HbckInfo info2) {
516 calledCount++;
519 @Override
520 public int summarize() {
521 return ++calledCount;
524 @Override
525 public void detail(String details) {
526 calledCount++;
529 @Override
530 public ArrayList<ERROR_CODE> getErrorList() {
531 calledCount++;
532 return new ArrayList<>();
535 @Override
536 public void progress() {
537 calledCount++;
540 @Override
541 public void print(String message) {
542 calledCount++;
545 @Override
546 public void resetErrors() {
547 calledCount++;
550 @Override
551 public boolean tableHasErrors(TableInfo table) {
552 calledCount++;
553 return false;
558 protected void deleteMetaRegion(Configuration conf, boolean unassign, boolean hdfs,
559 boolean regionInfoOnly) throws IOException, InterruptedException {
560 HRegionLocation metaLocation = connection.getRegionLocator(TableName.META_TABLE_NAME)
561 .getRegionLocation(HConstants.EMPTY_START_ROW);
562 ServerName hsa = metaLocation.getServerName();
563 HRegionInfo hri = metaLocation.getRegionInfo();
564 if (unassign) {
565 LOG.info("Undeploying meta region " + hri + " from server " + hsa);
566 try (Connection unmanagedConnection = ConnectionFactory.createConnection(conf)) {
567 undeployRegion(unmanagedConnection, hsa, hri);
571 if (regionInfoOnly) {
572 LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
573 Path rootDir = FSUtils.getRootDir(conf);
574 FileSystem fs = rootDir.getFileSystem(conf);
575 Path p = new Path(rootDir + "/" + TableName.META_TABLE_NAME.getNameAsString(),
576 hri.getEncodedName());
577 Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
578 fs.delete(hriPath, true);
581 if (hdfs) {
582 LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
583 Path rootDir = FSUtils.getRootDir(conf);
584 FileSystem fs = rootDir.getFileSystem(conf);
585 Path p = new Path(rootDir + "/" + TableName.META_TABLE_NAME.getNameAsString(),
586 hri.getEncodedName());
587 HBaseFsck.debugLsr(conf, p);
588 boolean success = fs.delete(p, true);
589 LOG.info("Deleted " + p + " sucessfully? " + success);
590 HBaseFsck.debugLsr(conf, p);
594 @org.junit.Rule
595 public TestName name = new TestName();
597 public static class MasterSyncObserver implements MasterObserver {
598 volatile CountDownLatch tableCreationLatch = null;
599 volatile CountDownLatch tableDeletionLatch = null;
601 @Override
602 public void postCompletedCreateTableAction(
603 final ObserverContext<MasterCoprocessorEnvironment> ctx,
604 final TableDescriptor desc,
605 final RegionInfo[] regions) throws IOException {
606 // the AccessController test, some times calls only and directly the
607 // postCompletedCreateTableAction()
608 if (tableCreationLatch != null) {
609 tableCreationLatch.countDown();
613 @Override
614 public void postCompletedDeleteTableAction(
615 final ObserverContext<MasterCoprocessorEnvironment> ctx,
616 final TableName tableName) throws IOException {
617 // the AccessController test, some times calls only and directly the
618 // postCompletedDeleteTableAction()
619 if (tableDeletionLatch != null) {
620 tableDeletionLatch.countDown();
625 public static void createTable(HBaseTestingUtility testUtil, HTableDescriptor htd,
626 byte [][] splitKeys) throws Exception {
627 // NOTE: We need a latch because admin is not sync,
628 // so the postOp coprocessor method may be called after the admin operation returned.
629 MasterSyncObserver observer = (MasterSyncObserver)testUtil.getHBaseCluster().getMaster()
630 .getMasterCoprocessorHost().findCoprocessor(MasterSyncObserver.class.getName());
631 observer.tableCreationLatch = new CountDownLatch(1);
632 if (splitKeys != null) {
633 admin.createTable(htd, splitKeys);
634 } else {
635 admin.createTable(htd);
637 observer.tableCreationLatch.await();
638 observer.tableCreationLatch = null;
639 testUtil.waitUntilAllRegionsAssigned(htd.getTableName());
642 public static void deleteTable(HBaseTestingUtility testUtil, TableName tableName)
643 throws Exception {
644 // NOTE: We need a latch because admin is not sync,
645 // so the postOp coprocessor method may be called after the admin operation returned.
646 MasterSyncObserver observer = (MasterSyncObserver)testUtil.getHBaseCluster().getMaster()
647 .getMasterCoprocessorHost().findCoprocessor(MasterSyncObserver.class.getName());
648 observer.tableDeletionLatch = new CountDownLatch(1);
649 try {
650 admin.disableTable(tableName);
651 } catch (Exception e) {
652 LOG.debug("Table: " + tableName + " already disabled, so just deleting it.");
654 admin.deleteTable(tableName);
655 observer.tableDeletionLatch.await();
656 observer.tableDeletionLatch = null;