HBASE-23723 Ensure MOB compaction works in optimized mode after snapshot clone (...
[hbase.git] / hbase-server / src / test / java / org / apache / hadoop / hbase / replication / TestReplicationKillRS.java
blobc2457267b280e42eb7474e1c0be21a93923c1d8e
1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org.apache.hadoop.hbase.replication;
20 import static org.junit.Assert.fail;
22 import org.apache.hadoop.hbase.HBaseClassTestRule;
23 import org.apache.hadoop.hbase.HBaseTestingUtility;
24 import org.apache.hadoop.hbase.UnknownScannerException;
25 import org.apache.hadoop.hbase.client.Connection;
26 import org.apache.hadoop.hbase.client.ConnectionFactory;
27 import org.apache.hadoop.hbase.client.Result;
28 import org.apache.hadoop.hbase.client.ResultScanner;
29 import org.apache.hadoop.hbase.client.Scan;
30 import org.apache.hadoop.hbase.client.Table;
31 import org.apache.hadoop.hbase.testclassification.LargeTests;
32 import org.apache.hadoop.hbase.testclassification.ReplicationTests;
33 import org.junit.ClassRule;
34 import org.junit.experimental.categories.Category;
35 import org.slf4j.Logger;
36 import org.slf4j.LoggerFactory;
38 @Category({ ReplicationTests.class, LargeTests.class })
39 public class TestReplicationKillRS extends TestReplicationBase {
41 @ClassRule
42 public static final HBaseClassTestRule CLASS_RULE =
43 HBaseClassTestRule.forClass(TestReplicationKillRS.class);
45 private static final Logger LOG = LoggerFactory.getLogger(TestReplicationKillRS.class);
47 /**
48 * Load up 1 tables over 2 region servers and kill a source during the upload. The failover
49 * happens internally. WARNING this test sometimes fails because of HBASE-3515
51 public void loadTableAndKillRS(HBaseTestingUtility util) throws Exception {
52 // killing the RS with hbase:meta can result into failed puts until we solve
53 // IO fencing
54 int rsToKill1 = util.getHBaseCluster().getServerWithMeta() == 0 ? 1 : 0;
56 // Takes about 20 secs to run the full loading, kill around the middle
57 Thread killer = killARegionServer(util, 5000, rsToKill1);
58 Result[] res;
59 int initialCount;
60 try (Connection conn = ConnectionFactory.createConnection(CONF1)) {
61 try (Table table = conn.getTable(tableName)) {
62 LOG.info("Start loading table");
63 initialCount = UTIL1.loadTable(table, famName);
64 LOG.info("Done loading table");
65 killer.join(5000);
66 LOG.info("Done waiting for threads");
68 while (true) {
69 try (ResultScanner scanner = table.getScanner(new Scan())) {
70 res = scanner.next(initialCount);
71 break;
72 } catch (UnknownScannerException ex) {
73 LOG.info("Cluster wasn't ready yet, restarting scanner");
78 // Test we actually have all the rows, we may miss some because we
79 // don't have IO fencing.
80 if (res.length != initialCount) {
81 LOG.warn("We lost some rows on the master cluster!");
82 // We don't really expect the other cluster to have more rows
83 initialCount = res.length;
86 int lastCount = 0;
87 final long start = System.currentTimeMillis();
88 int i = 0;
89 try (Connection conn = ConnectionFactory.createConnection(CONF2)) {
90 try (Table table = conn.getTable(tableName)) {
91 while (true) {
92 if (i == NB_RETRIES - 1) {
93 fail("Waited too much time for queueFailover replication. " + "Waited "
94 + (System.currentTimeMillis() - start) + "ms.");
96 Result[] res2;
97 try (ResultScanner scanner = table.getScanner(new Scan())) {
98 res2 = scanner.next(initialCount * 2);
100 if (res2.length < initialCount) {
101 if (lastCount < res2.length) {
102 i--; // Don't increment timeout if we make progress
103 } else {
104 i++;
106 lastCount = res2.length;
107 LOG.info(
108 "Only got " + lastCount + " rows instead of " + initialCount + " current i=" + i);
109 Thread.sleep(SLEEP_TIME * 2);
110 } else {
111 break;
118 private static Thread killARegionServer(final HBaseTestingUtility utility, final long timeout,
119 final int rs) {
120 Thread killer = new Thread() {
121 @Override
122 public void run() {
123 try {
124 Thread.sleep(timeout);
125 utility.getHBaseCluster().getRegionServer(rs).stop("Stopping as part of the test");
126 } catch (Exception e) {
127 LOG.error("Couldn't kill a region server", e);
131 killer.setDaemon(true);
132 killer.start();
133 return killer;