HBASE-24033 Add ut for loading the corrupt recovered hfiles (#1322)
[hbase.git] / hbase-server / src / test / java / org / apache / hadoop / hbase / regionserver / TestRegionMergeTransactionOnCluster.java
blob7086d9a8c374ccf951b1d4b54d8ad33c50592313
1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org.apache.hadoop.hbase.regionserver;
20 import static org.junit.Assert.assertEquals;
21 import static org.junit.Assert.assertFalse;
22 import static org.junit.Assert.assertTrue;
23 import static org.junit.Assert.fail;
25 import java.io.IOException;
26 import java.util.ArrayList;
27 import java.util.List;
28 import java.util.Objects;
29 import java.util.concurrent.atomic.AtomicBoolean;
30 import org.apache.commons.lang3.RandomUtils;
31 import org.apache.hadoop.conf.Configuration;
32 import org.apache.hadoop.fs.FileSystem;
33 import org.apache.hadoop.fs.Path;
34 import org.apache.hadoop.hbase.HBaseClassTestRule;
35 import org.apache.hadoop.hbase.HBaseTestingUtility;
36 import org.apache.hadoop.hbase.MetaTableAccessor;
37 import org.apache.hadoop.hbase.MiniHBaseCluster;
38 import org.apache.hadoop.hbase.ServerName;
39 import org.apache.hadoop.hbase.StartMiniClusterOption;
40 import org.apache.hadoop.hbase.TableName;
41 import org.apache.hadoop.hbase.UnknownRegionException;
42 import org.apache.hadoop.hbase.client.Admin;
43 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
44 import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
45 import org.apache.hadoop.hbase.client.Put;
46 import org.apache.hadoop.hbase.client.RegionInfo;
47 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
48 import org.apache.hadoop.hbase.client.Result;
49 import org.apache.hadoop.hbase.client.ResultScanner;
50 import org.apache.hadoop.hbase.client.Scan;
51 import org.apache.hadoop.hbase.client.Table;
52 import org.apache.hadoop.hbase.client.TableDescriptor;
53 import org.apache.hadoop.hbase.exceptions.MergeRegionException;
54 import org.apache.hadoop.hbase.master.HMaster;
55 import org.apache.hadoop.hbase.master.MasterRpcServices;
56 import org.apache.hadoop.hbase.master.RegionState;
57 import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
58 import org.apache.hadoop.hbase.master.assignment.RegionStates;
59 import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
60 import org.apache.hadoop.hbase.testclassification.LargeTests;
61 import org.apache.hadoop.hbase.testclassification.RegionServerTests;
62 import org.apache.hadoop.hbase.util.Bytes;
63 import org.apache.hadoop.hbase.util.FSUtils;
64 import org.apache.hadoop.hbase.util.FutureUtils;
65 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
66 import org.apache.hadoop.hbase.util.Pair;
67 import org.apache.hadoop.hbase.util.PairOfSameType;
68 import org.apache.hadoop.util.StringUtils;
69 import org.apache.zookeeper.KeeperException;
70 import org.junit.AfterClass;
71 import org.junit.BeforeClass;
72 import org.junit.ClassRule;
73 import org.junit.Rule;
74 import org.junit.Test;
75 import org.junit.experimental.categories.Category;
76 import org.junit.rules.TestName;
77 import org.slf4j.Logger;
78 import org.slf4j.LoggerFactory;
80 import org.apache.hbase.thirdparty.com.google.common.base.Joiner;
81 import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
82 import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
84 import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
85 import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
86 import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
88 @Category({RegionServerTests.class, LargeTests.class})
89 public class TestRegionMergeTransactionOnCluster {
91 @ClassRule
92 public static final HBaseClassTestRule CLASS_RULE =
93 HBaseClassTestRule.forClass(TestRegionMergeTransactionOnCluster.class);
95 private static final Logger LOG =
96 LoggerFactory.getLogger(TestRegionMergeTransactionOnCluster.class);
98 @Rule public TestName name = new TestName();
100 private static final int NB_SERVERS = 3;
102 private static final byte[] FAMILYNAME = Bytes.toBytes("fam");
103 private static final byte[] QUALIFIER = Bytes.toBytes("q");
105 private static byte[] ROW = Bytes.toBytes("testRow");
106 private static final int INITIAL_REGION_NUM = 10;
107 private static final int ROWSIZE = 200;
108 private static byte[][] ROWS = makeN(ROW, ROWSIZE);
110 private static int waitTime = 60 * 1000;
112 static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
114 private static HMaster MASTER;
115 private static Admin ADMIN;
117 @BeforeClass
118 public static void beforeAllTests() throws Exception {
119 // Start a cluster
120 StartMiniClusterOption option = StartMiniClusterOption.builder()
121 .masterClass(MyMaster.class).numRegionServers(NB_SERVERS).numDataNodes(NB_SERVERS).build();
122 TEST_UTIL.startMiniCluster(option);
123 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
124 MASTER = cluster.getMaster();
125 MASTER.balanceSwitch(false);
126 ADMIN = TEST_UTIL.getConnection().getAdmin();
129 @AfterClass
130 public static void afterAllTests() throws Exception {
131 TEST_UTIL.shutdownMiniCluster();
132 if (ADMIN != null) ADMIN.close();
135 @Test
136 public void testWholesomeMerge() throws Exception {
137 LOG.info("Starting " + name.getMethodName());
138 final TableName tableName = TableName.valueOf(name.getMethodName());
140 // Create table and load data.
141 Table table = createTableAndLoadData(MASTER, tableName);
142 // Merge 1st and 2nd region
143 mergeRegionsAndVerifyRegionNum(MASTER, tableName, 0, 1,
144 INITIAL_REGION_NUM - 1);
146 // Merge 2nd and 3th region
147 PairOfSameType<RegionInfo> mergedRegions =
148 mergeRegionsAndVerifyRegionNum(MASTER, tableName, 1, 2,
149 INITIAL_REGION_NUM - 2);
151 verifyRowCount(table, ROWSIZE);
153 // Randomly choose one of the two merged regions
154 RegionInfo hri = RandomUtils.nextBoolean() ?
155 mergedRegions.getFirst() : mergedRegions.getSecond();
156 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
157 AssignmentManager am = cluster.getMaster().getAssignmentManager();
158 RegionStates regionStates = am.getRegionStates();
160 // We should not be able to assign it again
161 am.assign(hri);
162 assertFalse("Merged region can't be assigned",
163 regionStates.isRegionInTransition(hri));
165 // We should not be able to unassign it either
166 am.unassign(hri);
167 assertFalse("Merged region can't be unassigned",
168 regionStates.isRegionInTransition(hri));
170 table.close();
174 * Not really restarting the master. Simulate it by clear of new region
175 * state since it is not persisted, will be lost after master restarts.
177 @Test
178 public void testMergeAndRestartingMaster() throws Exception {
179 final TableName tableName = TableName.valueOf(name.getMethodName());
181 // Create table and load data.
182 Table table = createTableAndLoadData(MASTER, tableName);
184 try {
185 MyMasterRpcServices.enabled.set(true);
187 // Merge 1st and 2nd region
188 mergeRegionsAndVerifyRegionNum(MASTER, tableName, 0, 1, INITIAL_REGION_NUM - 1);
189 } finally {
190 MyMasterRpcServices.enabled.set(false);
193 table.close();
196 @Test
197 public void testCleanMergeReference() throws Exception {
198 LOG.info("Starting " + name.getMethodName());
199 ADMIN.catalogJanitorSwitch(false);
200 try {
201 final TableName tableName = TableName.valueOf(name.getMethodName());
202 // Create table and load data.
203 Table table = createTableAndLoadData(MASTER, tableName);
204 // Merge 1st and 2nd region
205 mergeRegionsAndVerifyRegionNum(MASTER, tableName, 0, 1, INITIAL_REGION_NUM - 1);
206 verifyRowCount(table, ROWSIZE);
207 table.close();
209 List<Pair<RegionInfo, ServerName>> tableRegions = MetaTableAccessor
210 .getTableRegionsAndLocations(MASTER.getConnection(), tableName);
211 RegionInfo mergedRegionInfo = tableRegions.get(0).getFirst();
212 TableDescriptor tableDescriptor = MASTER.getTableDescriptors().get(
213 tableName);
214 Result mergedRegionResult = MetaTableAccessor.getRegionResult(
215 MASTER.getConnection(), mergedRegionInfo.getRegionName());
217 // contains merge reference in META
218 assertTrue(MetaTableAccessor.hasMergeRegions(mergedRegionResult.rawCells()));
220 // merging regions' directory are in the file system all the same
221 List<RegionInfo> p = MetaTableAccessor.getMergeRegions(mergedRegionResult.rawCells());
222 RegionInfo regionA = p.get(0);
223 RegionInfo regionB = p.get(1);
224 FileSystem fs = MASTER.getMasterFileSystem().getFileSystem();
225 Path rootDir = MASTER.getMasterFileSystem().getRootDir();
227 Path tabledir = FSUtils.getTableDir(rootDir, mergedRegionInfo.getTable());
228 Path regionAdir = new Path(tabledir, regionA.getEncodedName());
229 Path regionBdir = new Path(tabledir, regionB.getEncodedName());
230 assertTrue(fs.exists(regionAdir));
231 assertTrue(fs.exists(regionBdir));
233 ColumnFamilyDescriptor[] columnFamilies = tableDescriptor.getColumnFamilies();
234 HRegionFileSystem hrfs = new HRegionFileSystem(
235 TEST_UTIL.getConfiguration(), fs, tabledir, mergedRegionInfo);
236 int count = 0;
237 for(ColumnFamilyDescriptor colFamily : columnFamilies) {
238 count += hrfs.getStoreFiles(colFamily.getName()).size();
240 ADMIN.compactRegion(mergedRegionInfo.getRegionName());
241 // clean up the merged region store files
242 // wait until merged region have reference file
243 long timeout = System.currentTimeMillis() + waitTime;
244 int newcount = 0;
245 while (System.currentTimeMillis() < timeout) {
246 for(ColumnFamilyDescriptor colFamily : columnFamilies) {
247 newcount += hrfs.getStoreFiles(colFamily.getName()).size();
249 if(newcount > count) {
250 break;
252 Thread.sleep(50);
254 assertTrue(newcount > count);
255 List<RegionServerThread> regionServerThreads = TEST_UTIL.getHBaseCluster()
256 .getRegionServerThreads();
257 for (RegionServerThread rs : regionServerThreads) {
258 CompactedHFilesDischarger cleaner = new CompactedHFilesDischarger(100, null,
259 rs.getRegionServer(), false);
260 cleaner.chore();
261 Thread.sleep(1000);
263 while (System.currentTimeMillis() < timeout) {
264 int newcount1 = 0;
265 for(ColumnFamilyDescriptor colFamily : columnFamilies) {
266 newcount1 += hrfs.getStoreFiles(colFamily.getName()).size();
268 if(newcount1 <= 1) {
269 break;
271 Thread.sleep(50);
273 // run CatalogJanitor to clean merge references in hbase:meta and archive the
274 // files of merging regions
275 int cleaned = 0;
276 while (cleaned == 0) {
277 cleaned = ADMIN.runCatalogJanitor();
278 LOG.debug("catalog janitor returned " + cleaned);
279 Thread.sleep(50);
280 // Cleanup is async so wait till all procedures are done running.
281 ProcedureTestingUtility.waitNoProcedureRunning(
282 TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor());
284 assertFalse(regionAdir.toString(), fs.exists(regionAdir));
285 assertFalse(regionBdir.toString(), fs.exists(regionBdir));
286 assertTrue(cleaned > 0);
288 mergedRegionResult = MetaTableAccessor.getRegionResult(
289 TEST_UTIL.getConnection(), mergedRegionInfo.getRegionName());
290 assertFalse(MetaTableAccessor.hasMergeRegions(mergedRegionResult.rawCells()));
291 } finally {
292 ADMIN.catalogJanitorSwitch(true);
297 * This test tests 1, merging region not online;
298 * 2, merging same two regions; 3, merging unknown regions.
299 * They are in one test case so that we don't have to create
300 * many tables, and these tests are simple.
302 @Test
303 public void testMerge() throws Exception {
304 LOG.info("Starting " + name.getMethodName());
305 final TableName tableName = TableName.valueOf(name.getMethodName());
306 final Admin admin = TEST_UTIL.getAdmin();
308 try {
309 // Create table and load data.
310 Table table = createTableAndLoadData(MASTER, tableName);
311 AssignmentManager am = MASTER.getAssignmentManager();
312 List<RegionInfo> regions = am.getRegionStates().getRegionsOfTable(tableName);
313 // Fake offline one region
314 RegionInfo a = regions.get(0);
315 RegionInfo b = regions.get(1);
316 am.unassign(b);
317 am.offlineRegion(b);
318 try {
319 // Merge offline region. Region a is offline here
320 FutureUtils.get(
321 admin.mergeRegionsAsync(a.getEncodedNameAsBytes(), b.getEncodedNameAsBytes(), false));
322 fail("Offline regions should not be able to merge");
323 } catch (DoNotRetryRegionException ie) {
324 System.out.println(ie);
325 assertTrue(ie instanceof MergeRegionException);
328 try {
329 // Merge the same region: b and b.
330 FutureUtils
331 .get(admin.mergeRegionsAsync(b.getEncodedNameAsBytes(), b.getEncodedNameAsBytes(), true));
332 fail("A region should not be able to merge with itself, even forcfully");
333 } catch (IOException ie) {
334 assertTrue("Exception should mention regions not online",
335 StringUtils.stringifyException(ie).contains("region to itself") &&
336 ie instanceof MergeRegionException);
339 try {
340 // Merge unknown regions
341 FutureUtils.get(admin.mergeRegionsAsync(Bytes.toBytes("-f1"), Bytes.toBytes("-f2"), true));
342 fail("Unknown region could not be merged");
343 } catch (IOException ie) {
344 assertTrue("UnknownRegionException should be thrown", ie instanceof UnknownRegionException);
346 table.close();
347 } finally {
348 TEST_UTIL.deleteTable(tableName);
352 @Test
353 public void testMergeWithReplicas() throws Exception {
354 final TableName tableName = TableName.valueOf(name.getMethodName());
355 // Create table and load data.
356 createTableAndLoadData(MASTER, tableName, 5, 2);
357 List<Pair<RegionInfo, ServerName>> initialRegionToServers =
358 MetaTableAccessor.getTableRegionsAndLocations(
359 TEST_UTIL.getConnection(), tableName);
360 // Merge 1st and 2nd region
361 PairOfSameType<RegionInfo> mergedRegions = mergeRegionsAndVerifyRegionNum(MASTER, tableName,
362 0, 2, 5 * 2 - 2);
363 List<Pair<RegionInfo, ServerName>> currentRegionToServers =
364 MetaTableAccessor.getTableRegionsAndLocations(
365 TEST_UTIL.getConnection(), tableName);
366 List<RegionInfo> initialRegions = new ArrayList<>();
367 for (Pair<RegionInfo, ServerName> p : initialRegionToServers) {
368 initialRegions.add(p.getFirst());
370 List<RegionInfo> currentRegions = new ArrayList<>();
371 for (Pair<RegionInfo, ServerName> p : currentRegionToServers) {
372 currentRegions.add(p.getFirst());
374 assertTrue(initialRegions.contains(mergedRegions.getFirst())); //this is the first region
375 assertTrue(initialRegions.contains(RegionReplicaUtil.getRegionInfoForReplica(
376 mergedRegions.getFirst(), 1))); //this is the replica of the first region
377 assertTrue(initialRegions.contains(mergedRegions.getSecond())); //this is the second region
378 assertTrue(initialRegions.contains(RegionReplicaUtil.getRegionInfoForReplica(
379 mergedRegions.getSecond(), 1))); //this is the replica of the second region
380 assertTrue(!initialRegions.contains(currentRegions.get(0))); //this is the new region
381 assertTrue(!initialRegions.contains(RegionReplicaUtil.getRegionInfoForReplica(
382 currentRegions.get(0), 1))); //replica of the new region
383 assertTrue(currentRegions.contains(RegionReplicaUtil.getRegionInfoForReplica(
384 currentRegions.get(0), 1))); //replica of the new region
385 assertTrue(!currentRegions.contains(RegionReplicaUtil.getRegionInfoForReplica(
386 mergedRegions.getFirst(), 1))); //replica of the merged region
387 assertTrue(!currentRegions.contains(RegionReplicaUtil.getRegionInfoForReplica(
388 mergedRegions.getSecond(), 1))); //replica of the merged region
391 private PairOfSameType<RegionInfo> mergeRegionsAndVerifyRegionNum(
392 HMaster master, TableName tablename,
393 int regionAnum, int regionBnum, int expectedRegionNum) throws Exception {
394 PairOfSameType<RegionInfo> mergedRegions =
395 requestMergeRegion(master, tablename, regionAnum, regionBnum);
396 waitAndVerifyRegionNum(master, tablename, expectedRegionNum);
397 return mergedRegions;
400 private PairOfSameType<RegionInfo> requestMergeRegion(
401 HMaster master, TableName tablename,
402 int regionAnum, int regionBnum) throws Exception {
403 List<Pair<RegionInfo, ServerName>> tableRegions = MetaTableAccessor
404 .getTableRegionsAndLocations(
405 TEST_UTIL.getConnection(), tablename);
406 RegionInfo regionA = tableRegions.get(regionAnum).getFirst();
407 RegionInfo regionB = tableRegions.get(regionBnum).getFirst();
408 ADMIN.mergeRegionsAsync(
409 regionA.getEncodedNameAsBytes(),
410 regionB.getEncodedNameAsBytes(), false);
411 return new PairOfSameType<>(regionA, regionB);
414 private void waitAndVerifyRegionNum(HMaster master, TableName tablename,
415 int expectedRegionNum) throws Exception {
416 List<Pair<RegionInfo, ServerName>> tableRegionsInMeta;
417 List<RegionInfo> tableRegionsInMaster;
418 long timeout = System.currentTimeMillis() + waitTime;
419 while (System.currentTimeMillis() < timeout) {
420 tableRegionsInMeta =
421 MetaTableAccessor.getTableRegionsAndLocations(TEST_UTIL.getConnection(), tablename);
422 tableRegionsInMaster =
423 master.getAssignmentManager().getRegionStates().getRegionsOfTable(tablename);
424 LOG.info(Objects.toString(tableRegionsInMaster));
425 LOG.info(Objects.toString(tableRegionsInMeta));
426 int tableRegionsInMetaSize = tableRegionsInMeta.size();
427 int tableRegionsInMasterSize = tableRegionsInMaster.size();
428 if (tableRegionsInMetaSize == expectedRegionNum
429 && tableRegionsInMasterSize == expectedRegionNum) {
430 break;
432 Thread.sleep(250);
435 tableRegionsInMeta = MetaTableAccessor.getTableRegionsAndLocations(
436 TEST_UTIL.getConnection(), tablename);
437 LOG.info("Regions after merge:" + Joiner.on(',').join(tableRegionsInMeta));
438 assertEquals(expectedRegionNum, tableRegionsInMeta.size());
441 private Table createTableAndLoadData(HMaster master, TableName tablename)
442 throws Exception {
443 return createTableAndLoadData(master, tablename, INITIAL_REGION_NUM, 1);
446 private Table createTableAndLoadData(HMaster master, TableName tablename,
447 int numRegions, int replication) throws Exception {
448 assertTrue("ROWSIZE must > numregions:" + numRegions, ROWSIZE > numRegions);
449 byte[][] splitRows = new byte[numRegions - 1][];
450 for (int i = 0; i < splitRows.length; i++) {
451 splitRows[i] = ROWS[(i + 1) * ROWSIZE / numRegions];
454 Table table = TEST_UTIL.createTable(tablename, FAMILYNAME, splitRows);
455 LOG.info("Created " + table.getName());
456 if (replication > 1) {
457 HBaseTestingUtility.setReplicas(ADMIN, tablename, replication);
458 LOG.info("Set replication of " + replication + " on " + table.getName());
460 loadData(table);
461 LOG.info("Loaded " + table.getName());
462 verifyRowCount(table, ROWSIZE);
463 LOG.info("Verified " + table.getName());
465 List<Pair<RegionInfo, ServerName>> tableRegions;
466 TEST_UTIL.waitUntilAllRegionsAssigned(tablename);
467 LOG.info("All regions assigned for table - " + table.getName());
468 tableRegions = MetaTableAccessor.getTableRegionsAndLocations(
469 TEST_UTIL.getConnection(), tablename);
470 assertEquals("Wrong number of regions in table " + tablename,
471 numRegions * replication, tableRegions.size());
472 LOG.info(tableRegions.size() + "Regions after load: " + Joiner.on(',').join(tableRegions));
473 assertEquals(numRegions * replication, tableRegions.size());
474 return table;
477 private static byte[][] makeN(byte[] base, int n) {
478 byte[][] ret = new byte[n][];
479 for (int i = 0; i < n; i++) {
480 ret[i] = Bytes.add(base, Bytes.toBytes(String.format("%04d", i)));
482 return ret;
485 private void loadData(Table table) throws IOException {
486 for (int i = 0; i < ROWSIZE; i++) {
487 Put put = new Put(ROWS[i]);
488 put.addColumn(FAMILYNAME, QUALIFIER, Bytes.toBytes(i));
489 table.put(put);
493 private void verifyRowCount(Table table, int expectedRegionNum)
494 throws IOException {
495 ResultScanner scanner = table.getScanner(new Scan());
496 int rowCount = 0;
497 while (scanner.next() != null) {
498 rowCount++;
500 assertEquals(expectedRegionNum, rowCount);
501 scanner.close();
504 // Make it public so that JVMClusterUtil can access it.
505 public static class MyMaster extends HMaster {
506 public MyMaster(Configuration conf) throws IOException, KeeperException, InterruptedException {
507 super(conf);
510 @Override
511 protected RSRpcServices createRpcServices() throws IOException {
512 return new MyMasterRpcServices(this);
516 static class MyMasterRpcServices extends MasterRpcServices {
517 static AtomicBoolean enabled = new AtomicBoolean(false);
519 private HMaster myMaster;
520 public MyMasterRpcServices(HMaster master) throws IOException {
521 super(master);
522 myMaster = master;
525 @Override
526 public ReportRegionStateTransitionResponse reportRegionStateTransition(RpcController c,
527 ReportRegionStateTransitionRequest req) throws ServiceException {
528 ReportRegionStateTransitionResponse resp = super.reportRegionStateTransition(c, req);
529 if (enabled.get() && req.getTransition(0).getTransitionCode()
530 == TransitionCode.READY_TO_MERGE && !resp.hasErrorMessage()) {
531 RegionStates regionStates = myMaster.getAssignmentManager().getRegionStates();
532 for (RegionState regionState: regionStates.getRegionsStateInTransition()) {
533 // Find the merging_new region and remove it
534 if (regionState.isMergingNew()) {
535 regionStates.deleteRegion(regionState.getRegion());
539 return resp;