HBASE-26700 The way we bypass broken track file is not enough in StoreFileListFile...
[hbase.git] / hbase-server / src / test / java / org / apache / hadoop / hbase / regionserver / TestEndToEndSplitTransaction.java
blob9934417ed1abcdb65c6b85ed82436ab3fcf306c1
1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org.apache.hadoop.hbase.regionserver;
20 import static org.junit.Assert.assertArrayEquals;
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertTrue;
25 import java.io.IOException;
26 import java.util.ArrayList;
27 import java.util.List;
28 import java.util.Map;
29 import java.util.Random;
30 import java.util.Set;
31 import java.util.TreeSet;
32 import java.util.stream.Collectors;
33 import org.apache.hadoop.conf.Configuration;
34 import org.apache.hadoop.hbase.CatalogFamilyFormat;
35 import org.apache.hadoop.hbase.ChoreService;
36 import org.apache.hadoop.hbase.HBaseClassTestRule;
37 import org.apache.hadoop.hbase.HBaseTestingUtil;
38 import org.apache.hadoop.hbase.HConstants;
39 import org.apache.hadoop.hbase.HRegionLocation;
40 import org.apache.hadoop.hbase.MetaTableAccessor;
41 import org.apache.hadoop.hbase.NotServingRegionException;
42 import org.apache.hadoop.hbase.ScheduledChore;
43 import org.apache.hadoop.hbase.Stoppable;
44 import org.apache.hadoop.hbase.TableName;
45 import org.apache.hadoop.hbase.Waiter;
46 import org.apache.hadoop.hbase.client.Admin;
47 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
48 import org.apache.hadoop.hbase.client.Connection;
49 import org.apache.hadoop.hbase.client.ConnectionFactory;
50 import org.apache.hadoop.hbase.client.Get;
51 import org.apache.hadoop.hbase.client.Put;
52 import org.apache.hadoop.hbase.client.RegionInfo;
53 import org.apache.hadoop.hbase.client.RegionLocator;
54 import org.apache.hadoop.hbase.client.Result;
55 import org.apache.hadoop.hbase.client.Table;
56 import org.apache.hadoop.hbase.client.TableDescriptor;
57 import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
58 import org.apache.hadoop.hbase.testclassification.LargeTests;
59 import org.apache.hadoop.hbase.util.Bytes;
60 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
61 import org.apache.hadoop.hbase.util.Pair;
62 import org.apache.hadoop.hbase.util.PairOfSameType;
63 import org.apache.hadoop.hbase.util.StoppableImplementation;
64 import org.apache.hadoop.hbase.util.Threads;
65 import org.junit.AfterClass;
66 import org.junit.BeforeClass;
67 import org.junit.ClassRule;
68 import org.junit.Rule;
69 import org.junit.Test;
70 import org.junit.experimental.categories.Category;
71 import org.junit.rules.TestName;
72 import org.slf4j.Logger;
73 import org.slf4j.LoggerFactory;
75 import org.apache.hbase.thirdparty.com.google.common.collect.Iterators;
76 import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
77 import org.apache.hbase.thirdparty.com.google.common.io.Closeables;
79 @Category(LargeTests.class)
80 public class TestEndToEndSplitTransaction {
82 @ClassRule
83 public static final HBaseClassTestRule CLASS_RULE =
84 HBaseClassTestRule.forClass(TestEndToEndSplitTransaction.class);
86 private static final Logger LOG = LoggerFactory.getLogger(TestEndToEndSplitTransaction.class);
87 private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
88 private static final Configuration CONF = TEST_UTIL.getConfiguration();
90 @Rule
91 public TestName name = new TestName();
93 @BeforeClass
94 public static void beforeAllTests() throws Exception {
95 TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
96 TEST_UTIL.startMiniCluster(1);
99 @AfterClass
100 public static void afterAllTests() throws Exception {
101 TEST_UTIL.shutdownMiniCluster();
105 * This is the test for : HBASE-20940 This test will split the region and try to open an reference
106 * over store file. Once store file has any reference, it makes sure that region can't be split
108 @Test
109 public void testCanSplitJustAfterASplit() throws Exception {
110 LOG.info("Starting testCanSplitJustAfterASplit");
111 byte[] fam = Bytes.toBytes("cf_split");
113 CompactSplit compactSplit =
114 TEST_UTIL.getMiniHBaseCluster().getRegionServer(0).getCompactSplitThread();
115 TableName tableName = TableName.valueOf("CanSplitTable");
116 Table source = TEST_UTIL.getConnection().getTable(tableName);
117 Admin admin = TEST_UTIL.getAdmin();
118 // set a large min compaction file count to avoid compaction just after splitting.
119 TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName)
120 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(fam)).build();
121 Map<String, StoreFileReader> scanner = Maps.newHashMap();
122 try {
123 admin.createTable(htd);
124 TEST_UTIL.loadTable(source, fam);
125 compactSplit.setCompactionsEnabled(false);
126 admin.split(tableName);
127 TEST_UTIL.waitFor(60000, () -> TEST_UTIL.getHBaseCluster().getRegions(tableName).size() == 2);
129 List<HRegion> regions = TEST_UTIL.getHBaseCluster().getRegions(tableName);
130 regions.stream()
131 .forEach(r -> r.getStores().get(0).getStorefiles().stream()
132 .filter(s -> s.isReference() && !scanner.containsKey(r.getRegionInfo().getEncodedName()))
133 .forEach(sf -> {
134 StoreFileReader reader = ((HStoreFile) sf).getReader();
135 reader.getStoreFileScanner(true, false, false, 0, 0, false);
136 scanner.put(r.getRegionInfo().getEncodedName(), reader);
137 LOG.info("Got reference to file = " + sf.getPath() + ",for region = " +
138 r.getRegionInfo().getEncodedName());
139 }));
140 assertTrue("Regions did not split properly", regions.size() > 1);
141 assertTrue("Could not get reference any of the store file", scanner.size() > 1);
142 compactSplit.setCompactionsEnabled(true);
143 for (HRegion region : regions) {
144 region.compact(true);
147 regions.stream()
148 .filter(region -> scanner.containsKey(region.getRegionInfo().getEncodedName()))
149 .forEach(r -> assertFalse("Contains an open file reference which can be split",
150 r.getStores().get(0).canSplit()));
151 } finally {
152 scanner.values().forEach(s -> {
153 try {
154 s.close(true);
155 } catch (IOException ioe) {
156 LOG.error("Failed while closing store file", ioe);
159 scanner.clear();
160 Closeables.close(source, true);
161 if (!compactSplit.isCompactionsEnabled()) {
162 compactSplit.setCompactionsEnabled(true);
164 TEST_UTIL.deleteTableIfAny(tableName);
169 * Tests that the client sees meta table changes as atomic during splits
171 @Test
172 public void testFromClientSideWhileSplitting() throws Throwable {
173 LOG.info("Starting testFromClientSideWhileSplitting");
174 final TableName tableName = TableName.valueOf(name.getMethodName());
175 final byte[] FAMILY = Bytes.toBytes("family");
177 // SplitTransaction will update the meta table by offlining the parent region, and adding info
178 // for daughters.
179 Table table = TEST_UTIL.createTable(tableName, FAMILY);
181 Stoppable stopper = new StoppableImplementation();
182 RegionSplitter regionSplitter = new RegionSplitter(table);
183 RegionChecker regionChecker = new RegionChecker(CONF, stopper, tableName);
184 final ChoreService choreService = new ChoreService("TEST_SERVER");
186 choreService.scheduleChore(regionChecker);
187 regionSplitter.start();
189 // wait until the splitter is finished
190 regionSplitter.join();
191 stopper.stop(null);
193 if (regionChecker.ex != null) {
194 throw new AssertionError("regionChecker", regionChecker.ex);
197 if (regionSplitter.ex != null) {
198 throw new AssertionError("regionSplitter", regionSplitter.ex);
201 // one final check
202 regionChecker.verify();
205 static class RegionSplitter extends Thread {
206 final Connection connection;
207 Throwable ex;
208 Table table;
209 TableName tableName;
210 byte[] family;
211 Admin admin;
212 HRegionServer rs;
214 RegionSplitter(Table table) throws IOException {
215 this.table = table;
216 this.tableName = table.getName();
217 this.family = table.getDescriptor().getColumnFamilies()[0].getName();
218 admin = TEST_UTIL.getAdmin();
219 rs = TEST_UTIL.getMiniHBaseCluster().getRegionServer(0);
220 connection = TEST_UTIL.getConnection();
223 @Override
224 public void run() {
225 try {
226 Random random = new Random();
227 for (int i = 0; i < 5; i++) {
228 List<RegionInfo> regions = MetaTableAccessor.getTableRegions(connection, tableName, true);
229 if (regions.isEmpty()) {
230 continue;
232 int regionIndex = random.nextInt(regions.size());
234 // pick a random region and split it into two
235 RegionInfo region = Iterators.get(regions.iterator(), regionIndex);
237 // pick the mid split point
238 int start = 0, end = Integer.MAX_VALUE;
239 if (region.getStartKey().length > 0) {
240 start = Bytes.toInt(region.getStartKey());
242 if (region.getEndKey().length > 0) {
243 end = Bytes.toInt(region.getEndKey());
245 int mid = start + ((end - start) / 2);
246 byte[] splitPoint = Bytes.toBytes(mid);
248 // put some rows to the regions
249 addData(start);
250 addData(mid);
252 flushAndBlockUntilDone(admin, rs, region.getRegionName());
253 compactAndBlockUntilDone(admin, rs, region.getRegionName());
255 log("Initiating region split for:" + region.getRegionNameAsString());
256 try {
257 admin.splitRegionAsync(region.getRegionName(), splitPoint).get();
258 // wait until the split is complete
259 blockUntilRegionSplit(CONF, 50000, region.getRegionName(), true);
260 } catch (NotServingRegionException ex) {
261 // ignore
264 } catch (Throwable ex) {
265 this.ex = ex;
269 void addData(int start) throws IOException {
270 List<Put> puts = new ArrayList<>();
271 for (int i = start; i < start + 100; i++) {
272 Put put = new Put(Bytes.toBytes(i));
273 put.addColumn(family, family, Bytes.toBytes(i));
274 puts.add(put);
276 table.put(puts);
281 * Checks regions using MetaTableAccessor and HTable methods
283 static class RegionChecker extends ScheduledChore {
284 Connection connection;
285 Configuration conf;
286 TableName tableName;
287 Throwable ex;
289 RegionChecker(Configuration conf, Stoppable stopper, TableName tableName) throws IOException {
290 super("RegionChecker", stopper, 100);
291 this.conf = conf;
292 this.tableName = tableName;
294 this.connection = ConnectionFactory.createConnection(conf);
297 /** verify region boundaries obtained from MetaScanner */
298 void verifyRegionsUsingMetaTableAccessor() throws Exception {
299 List<RegionInfo> regionList = MetaTableAccessor.getTableRegions(connection, tableName, true);
300 verifyTableRegions(regionList.stream()
301 .collect(Collectors.toCollection(() -> new TreeSet<>(RegionInfo.COMPARATOR))));
302 regionList = MetaTableAccessor.getAllRegions(connection, true);
303 verifyTableRegions(regionList.stream()
304 .collect(Collectors.toCollection(() -> new TreeSet<>(RegionInfo.COMPARATOR))));
307 /** verify region boundaries obtained from HTable.getStartEndKeys() */
308 void verifyRegionsUsingHTable() throws IOException {
309 try (RegionLocator rl = connection.getRegionLocator(tableName)) {
310 Pair<byte[][], byte[][]> keys = rl.getStartEndKeys();
311 verifyStartEndKeys(keys);
313 Set<RegionInfo> regions = new TreeSet<>(RegionInfo.COMPARATOR);
314 for (HRegionLocation loc : rl.getAllRegionLocations()) {
315 regions.add(loc.getRegion());
317 verifyTableRegions(regions);
321 void verify() throws Exception {
322 verifyRegionsUsingMetaTableAccessor();
323 verifyRegionsUsingHTable();
326 void verifyTableRegions(Set<RegionInfo> regions) {
327 log("Verifying " + regions.size() + " regions: " + regions);
329 byte[][] startKeys = new byte[regions.size()][];
330 byte[][] endKeys = new byte[regions.size()][];
332 int i = 0;
333 for (RegionInfo region : regions) {
334 startKeys[i] = region.getStartKey();
335 endKeys[i] = region.getEndKey();
336 i++;
339 Pair<byte[][], byte[][]> keys = new Pair<>(startKeys, endKeys);
340 verifyStartEndKeys(keys);
343 void verifyStartEndKeys(Pair<byte[][], byte[][]> keys) {
344 byte[][] startKeys = keys.getFirst();
345 byte[][] endKeys = keys.getSecond();
346 assertEquals(startKeys.length, endKeys.length);
347 assertTrue("Found 0 regions for the table", startKeys.length > 0);
349 assertArrayEquals("Start key for the first region is not byte[0]", HConstants.EMPTY_START_ROW,
350 startKeys[0]);
351 byte[] prevEndKey = HConstants.EMPTY_START_ROW;
353 // ensure that we do not have any gaps
354 for (int i = 0; i < startKeys.length; i++) {
355 assertArrayEquals(
356 "Hole in hbase:meta is detected. prevEndKey=" + Bytes.toStringBinary(prevEndKey) +
357 " ,regionStartKey=" + Bytes.toStringBinary(startKeys[i]),
358 prevEndKey, startKeys[i]);
359 prevEndKey = endKeys[i];
361 assertArrayEquals("End key for the last region is not byte[0]", HConstants.EMPTY_END_ROW,
362 endKeys[endKeys.length - 1]);
365 @Override
366 protected void chore() {
367 try {
368 verify();
369 } catch (Throwable ex) {
370 this.ex = ex;
371 getStopper().stop("caught exception");
376 public static void log(String msg) {
377 LOG.info(msg);
380 /* some utility methods for split tests */
382 public static void flushAndBlockUntilDone(Admin admin, HRegionServer rs, byte[] regionName)
383 throws IOException, InterruptedException {
384 log("flushing region: " + Bytes.toStringBinary(regionName));
385 admin.flushRegion(regionName);
386 log("blocking until flush is complete: " + Bytes.toStringBinary(regionName));
387 Threads.sleepWithoutInterrupt(500);
388 while (rs.getOnlineRegion(regionName).getMemStoreDataSize() > 0) {
389 Threads.sleep(50);
393 public static void compactAndBlockUntilDone(Admin admin, HRegionServer rs, byte[] regionName)
394 throws IOException, InterruptedException {
395 log("Compacting region: " + Bytes.toStringBinary(regionName));
396 // Wait till its online before we do compact else it comes back with NoServerForRegionException
397 try {
398 TEST_UTIL.waitFor(10000, new Waiter.Predicate<Exception>() {
399 @Override public boolean evaluate() throws Exception {
400 return rs.getServerName().equals(MetaTableAccessor.
401 getRegionLocation(admin.getConnection(), regionName).getServerName());
404 } catch (Exception e) {
405 throw new IOException(e);
407 admin.majorCompactRegion(regionName);
408 log("blocking until compaction is complete: " + Bytes.toStringBinary(regionName));
409 Threads.sleepWithoutInterrupt(500);
410 outer: for (;;) {
411 for (Store store : rs.getOnlineRegion(regionName).getStores()) {
412 if (store.getStorefilesCount() > 1) {
413 Threads.sleep(50);
414 continue outer;
417 break;
422 * Blocks until the region split is complete in hbase:meta and region server opens the daughters
424 public static void blockUntilRegionSplit(Configuration conf, long timeout,
425 final byte[] regionName, boolean waitForDaughters) throws IOException, InterruptedException {
426 long start = EnvironmentEdgeManager.currentTime();
427 log("blocking until region is split:" + Bytes.toStringBinary(regionName));
428 RegionInfo daughterA = null, daughterB = null;
429 try (Connection conn = ConnectionFactory.createConnection(conf);
430 Table metaTable = conn.getTable(TableName.META_TABLE_NAME)) {
431 Result result = null;
432 RegionInfo region = null;
433 while ((EnvironmentEdgeManager.currentTime() - start) < timeout) {
434 result = metaTable.get(new Get(regionName));
435 if (result == null) {
436 break;
439 region = CatalogFamilyFormat.getRegionInfo(result);
440 if (region.isSplitParent()) {
441 log("found parent region: " + region.toString());
442 PairOfSameType<RegionInfo> pair = MetaTableAccessor.getDaughterRegions(result);
443 daughterA = pair.getFirst();
444 daughterB = pair.getSecond();
445 break;
447 Threads.sleep(100);
449 if (daughterA == null || daughterB == null) {
450 throw new IOException("Failed to get daughters, daughterA=" + daughterA + ", daughterB=" +
451 daughterB + ", timeout=" + timeout + ", result=" + result + ", regionName=" +
452 Bytes.toString(regionName) + ", region=" + region);
455 // if we are here, this means the region split is complete or timed out
456 if (waitForDaughters) {
457 long rem = timeout - (EnvironmentEdgeManager.currentTime() - start);
458 blockUntilRegionIsInMeta(conn, rem, daughterA);
460 rem = timeout - (EnvironmentEdgeManager.currentTime() - start);
461 blockUntilRegionIsInMeta(conn, rem, daughterB);
463 rem = timeout - (EnvironmentEdgeManager.currentTime() - start);
464 blockUntilRegionIsOpened(conf, rem, daughterA);
466 rem = timeout - (EnvironmentEdgeManager.currentTime() - start);
467 blockUntilRegionIsOpened(conf, rem, daughterB);
469 // Compacting the new region to make sure references can be cleaned up
470 compactAndBlockUntilDone(TEST_UTIL.getAdmin(),
471 TEST_UTIL.getMiniHBaseCluster().getRegionServer(0), daughterA.getRegionName());
472 compactAndBlockUntilDone(TEST_UTIL.getAdmin(),
473 TEST_UTIL.getMiniHBaseCluster().getRegionServer(0), daughterB.getRegionName());
475 removeCompactedFiles(conn, timeout, daughterA);
476 removeCompactedFiles(conn, timeout, daughterB);
481 public static void removeCompactedFiles(Connection conn, long timeout, RegionInfo hri)
482 throws IOException, InterruptedException {
483 log("remove compacted files for : " + hri.getRegionNameAsString());
484 List<HRegion> regions = TEST_UTIL.getHBaseCluster().getRegions(hri.getTable());
485 regions.stream().forEach(r -> {
486 try {
487 r.getStores().get(0).closeAndArchiveCompactedFiles();
488 } catch (IOException ioe) {
489 LOG.error("failed in removing compacted file", ioe);
494 public static void blockUntilRegionIsInMeta(Connection conn, long timeout, RegionInfo hri)
495 throws IOException, InterruptedException {
496 log("blocking until region is in META: " + hri.getRegionNameAsString());
497 long start = EnvironmentEdgeManager.currentTime();
498 while (EnvironmentEdgeManager.currentTime() - start < timeout) {
499 HRegionLocation loc = MetaTableAccessor.getRegionLocation(conn, hri);
500 if (loc != null && !loc.getRegion().isOffline()) {
501 log("found region in META: " + hri.getRegionNameAsString());
502 break;
504 Threads.sleep(100);
508 public static void blockUntilRegionIsOpened(Configuration conf, long timeout, RegionInfo hri)
509 throws IOException, InterruptedException {
510 log("blocking until region is opened for reading:" + hri.getRegionNameAsString());
511 long start = EnvironmentEdgeManager.currentTime();
512 try (Connection conn = ConnectionFactory.createConnection(conf);
513 Table table = conn.getTable(hri.getTable())) {
514 byte[] row = hri.getStartKey();
515 // Check for null/empty row. If we find one, use a key that is likely to be in first region.
516 if (row == null || row.length <= 0) {
517 row = new byte[] { '0' };
519 Get get = new Get(row);
520 while (EnvironmentEdgeManager.currentTime() - start < timeout) {
521 try {
522 table.get(get);
523 break;
524 } catch (IOException ex) {
525 // wait some more
527 Threads.sleep(100);