HBASE-26567 Remove IndexType from ChunkCreator (#3947)
[hbase.git] / hbase-server / src / test / java / org / apache / hadoop / hbase / regionserver / TestRSKilledWhenInitializing.java
blob7cffc39c165a8129a08b2819237e338502d46731
1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org.apache.hadoop.hbase.regionserver;
20 import static org.junit.Assert.assertEquals;
21 import static org.junit.Assert.assertTrue;
23 import java.io.IOException;
24 import java.util.List;
25 import java.util.Map;
26 import java.util.concurrent.atomic.AtomicBoolean;
27 import java.util.concurrent.atomic.AtomicReference;
28 import org.apache.hadoop.conf.Configuration;
29 import org.apache.hadoop.hbase.HBaseClassTestRule;
30 import org.apache.hadoop.hbase.HBaseConfiguration;
31 import org.apache.hadoop.hbase.HBaseTestingUtil;
32 import org.apache.hadoop.hbase.LocalHBaseCluster;
33 import org.apache.hadoop.hbase.ServerName;
34 import org.apache.hadoop.hbase.SingleProcessHBaseCluster;
35 import org.apache.hadoop.hbase.client.RegionInfo;
36 import org.apache.hadoop.hbase.master.HMaster;
37 import org.apache.hadoop.hbase.master.ServerListener;
38 import org.apache.hadoop.hbase.master.ServerManager;
39 import org.apache.hadoop.hbase.testclassification.MediumTests;
40 import org.apache.hadoop.hbase.testclassification.RegionServerTests;
41 import org.apache.hadoop.hbase.util.Bytes;
42 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
43 import org.apache.hadoop.hbase.util.Threads;
44 import org.junit.ClassRule;
45 import org.junit.Ignore;
46 import org.junit.Rule;
47 import org.junit.Test;
48 import org.junit.experimental.categories.Category;
49 import org.junit.rules.TestName;
50 import org.slf4j.Logger;
51 import org.slf4j.LoggerFactory;
53 import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionServerStartupResponse;
55 /**
56 * Tests that a regionserver that dies after reporting for duty gets removed
57 * from list of online regions. See HBASE-9593.
59 @Category({RegionServerTests.class, MediumTests.class})
60 @Ignore("See HBASE-19515")
61 public class TestRSKilledWhenInitializing {
63 @ClassRule
64 public static final HBaseClassTestRule CLASS_RULE =
65 HBaseClassTestRule.forClass(TestRSKilledWhenInitializing.class);
67 private static final Logger LOG = LoggerFactory.getLogger(TestRSKilledWhenInitializing.class);
69 @Rule
70 public TestName testName = new TestName();
72 // This boolean needs to be globally available. It is used below in our
73 // mocked up regionserver so it knows when to die.
74 private static AtomicBoolean masterActive = new AtomicBoolean(false);
75 // Ditto for this variable. It also is used in the mocked regionserver class.
76 private static final AtomicReference<ServerName> killedRS = new AtomicReference<ServerName>();
78 private static final int NUM_MASTERS = 1;
79 private static final int NUM_RS = 2;
81 /**
82 * Test verifies whether a region server is removed from online servers list in master if it went
83 * down after registering with master. Test will TIMEOUT if an error!!!!
84 * @throws Exception
86 @Test
87 public void testRSTerminationAfterRegisteringToMasterBeforeCreatingEphemeralNode()
88 throws Exception {
89 // Create config to use for this cluster
90 Configuration conf = HBaseConfiguration.create();
91 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
92 // Start the cluster
93 final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(conf);
94 TEST_UTIL.startMiniDFSCluster(3);
95 TEST_UTIL.startMiniZKCluster();
96 TEST_UTIL.createRootDir();
97 final LocalHBaseCluster cluster = new LocalHBaseCluster(conf, NUM_MASTERS, NUM_RS,
98 HMaster.class, RegisterAndDieRegionServer.class);
99 final MasterThread master = startMaster(cluster.getMasters().get(0));
100 try {
101 // Master is up waiting on RegionServers to check in. Now start RegionServers.
102 for (int i = 0; i < NUM_RS; i++) {
103 cluster.getRegionServers().get(i).start();
105 // Expected total regionservers depends on whether Master can host regions or not.
106 int expectedTotalRegionServers = NUM_RS;
107 List<ServerName> onlineServersList = null;
108 do {
109 onlineServersList = master.getMaster().getServerManager().getOnlineServersList();
110 } while (onlineServersList.size() < expectedTotalRegionServers);
111 // Wait until killedRS is set. Means RegionServer is starting to go down.
112 while (killedRS.get() == null) {
113 Threads.sleep(1);
115 // Wait on the RegionServer to fully die.
116 while (cluster.getLiveRegionServers().size() >= expectedTotalRegionServers) {
117 Threads.sleep(1);
119 // Make sure Master is fully up before progressing. Could take a while if regions
120 // being reassigned.
121 while (!master.getMaster().isInitialized()) {
122 Threads.sleep(1);
125 // Now in steady state. How many regions open? Master should have too many regionservers
126 // showing still. The downed RegionServer should still be showing as registered.
127 assertTrue(master.getMaster().getServerManager().isServerOnline(killedRS.get()));
128 // Find non-meta region (namespace?) and assign to the killed server. That'll trigger cleanup.
129 Map<RegionInfo, ServerName> assignments = null;
130 do {
131 assignments = master.getMaster().getAssignmentManager().getRegionStates().getRegionAssignments();
132 } while (assignments == null || assignments.size() < 2);
133 RegionInfo hri = null;
134 for (Map.Entry<RegionInfo, ServerName> e: assignments.entrySet()) {
135 if (e.getKey().isMetaRegion()) continue;
136 hri = e.getKey();
137 break;
139 // Try moving region to the killed server. It will fail. As by-product, we will
140 // remove the RS from Master online list because no corresponding znode.
141 assertEquals(expectedTotalRegionServers,
142 master.getMaster().getServerManager().getOnlineServersList().size());
143 LOG.info("Move " + hri.getEncodedName() + " to " + killedRS.get());
144 master.getMaster().move(hri.getEncodedNameAsBytes(),
145 Bytes.toBytes(killedRS.get().toString()));
147 // TODO: This test could do more to verify fix. It could create a table
148 // and do round-robin assign. It should fail if zombie RS. HBASE-19515.
150 // Wait until the RS no longer shows as registered in Master.
151 while (onlineServersList.size() > (NUM_RS + 1)) {
152 Thread.sleep(100);
153 onlineServersList = master.getMaster().getServerManager().getOnlineServersList();
155 } finally {
156 // Shutdown is messy with complaints about fs being closed. Why? TODO.
157 cluster.shutdown();
158 cluster.join();
159 TEST_UTIL.shutdownMiniDFSCluster();
160 TEST_UTIL.shutdownMiniZKCluster();
161 TEST_UTIL.cleanupTestDir();
166 * Start Master. Get as far as the state where Master is waiting on
167 * RegionServers to check in, then return.
169 private MasterThread startMaster(MasterThread master) {
170 master.start();
171 // It takes a while until ServerManager creation to happen inside Master startup.
172 while (master.getMaster().getServerManager() == null) {
173 continue;
175 // Set a listener for the waiting-on-RegionServers state. We want to wait
176 // until this condition before we leave this method and start regionservers.
177 final AtomicBoolean waiting = new AtomicBoolean(false);
178 if (master.getMaster().getServerManager() == null) throw new NullPointerException("SM");
179 master.getMaster().getServerManager().registerListener(new ServerListener() {
180 @Override
181 public void waiting() {
182 waiting.set(true);
185 // Wait until the Master gets to place where it is waiting on RegionServers to check in.
186 while (!waiting.get()) {
187 continue;
189 // Set the global master-is-active; gets picked up by regionservers later.
190 masterActive.set(true);
191 return master;
195 * A RegionServer that reports for duty and then immediately dies if it is the first to receive
196 * the response to a reportForDuty. When it dies, it clears its ephemeral znode which the master
197 * notices and so removes the region from its set of online regionservers.
199 static class RegisterAndDieRegionServer
200 extends SingleProcessHBaseCluster.MiniHBaseClusterRegionServer {
201 public RegisterAndDieRegionServer(Configuration conf) throws IOException, InterruptedException {
202 super(conf);
205 @Override
206 protected void handleReportForDutyResponse(RegionServerStartupResponse c)
207 throws IOException {
208 if (killedRS.compareAndSet(null, getServerName())) {
209 // Make sure Master is up so it will see the removal of the ephemeral znode for this RS.
210 while (!masterActive.get()) {
211 Threads.sleep(100);
213 super.kill();
214 } else {
215 super.handleReportForDutyResponse(c);