2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org
.apache
.hadoop
.hbase
.master
;
20 import static org
.junit
.Assert
.assertFalse
;
21 import static org
.junit
.Assert
.assertNotNull
;
22 import static org
.junit
.Assert
.assertNull
;
23 import static org
.junit
.Assert
.assertTrue
;
25 import java
.io
.IOException
;
26 import java
.util
.List
;
27 import java
.util
.Optional
;
28 import java
.util
.concurrent
.CountDownLatch
;
29 import java
.util
.stream
.Collectors
;
31 import org
.apache
.hadoop
.conf
.Configuration
;
32 import org
.apache
.hadoop
.hbase
.HBaseClassTestRule
;
33 import org
.apache
.hadoop
.hbase
.ServerName
;
34 import org
.apache
.hadoop
.hbase
.StartMiniClusterOption
;
35 import org
.apache
.hadoop
.hbase
.TableName
;
36 import org
.apache
.hadoop
.hbase
.client
.RegionInfo
;
37 import org
.apache
.hadoop
.hbase
.client
.Table
;
38 import org
.apache
.hadoop
.hbase
.master
.assignment
.AssignmentManager
;
39 import org
.apache
.hadoop
.hbase
.master
.assignment
.ServerState
;
40 import org
.apache
.hadoop
.hbase
.master
.assignment
.ServerStateNode
;
41 import org
.apache
.hadoop
.hbase
.master
.procedure
.ServerCrashProcedure
;
42 import org
.apache
.hadoop
.hbase
.procedure2
.Procedure
;
43 import org
.apache
.hadoop
.hbase
.regionserver
.HRegionServer
;
44 import org
.apache
.hadoop
.hbase
.testclassification
.LargeTests
;
45 import org
.apache
.hadoop
.hbase
.testclassification
.MasterTests
;
46 import org
.apache
.hadoop
.hbase
.util
.JVMClusterUtil
;
47 import org
.apache
.zookeeper
.KeeperException
;
48 import org
.junit
.ClassRule
;
49 import org
.junit
.Test
;
50 import org
.junit
.experimental
.categories
.Category
;
51 import org
.slf4j
.Logger
;
52 import org
.slf4j
.LoggerFactory
;
54 @Category({ MasterTests
.class, LargeTests
.class })
55 public class TestClusterRestartFailover
extends AbstractTestRestartCluster
{
58 public static final HBaseClassTestRule CLASS_RULE
=
59 HBaseClassTestRule
.forClass(TestClusterRestartFailover
.class);
61 private static final Logger LOG
= LoggerFactory
.getLogger(TestClusterRestartFailover
.class);
63 private volatile static CountDownLatch SCP_LATCH
;
64 private static ServerName SERVER_FOR_TEST
;
67 protected boolean splitWALCoordinatedByZk() {
71 private ServerStateNode
getServerStateNode(ServerName serverName
) {
72 return UTIL
.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates()
73 .getServerNode(serverName
);
77 * Test for HBASE-22964
80 public void test() throws Exception
{
84 // Find server that does not have hbase:namespace on it. This tests holds up SCPs. If it
85 // holds up the server w/ hbase:namespace, the Master initialization will be held up
86 // because this table is not online and test fails.
87 for (JVMClusterUtil
.RegionServerThread rst
:
88 UTIL
.getHBaseCluster().getLiveRegionServerThreads()) {
89 HRegionServer rs
= rst
.getRegionServer();
90 if (rs
.getRegions(TableName
.NAMESPACE_TABLE_NAME
).isEmpty()) {
91 SERVER_FOR_TEST
= rs
.getServerName();
94 UTIL
.waitFor(60000, () -> getServerStateNode(SERVER_FOR_TEST
) != null);
95 ServerStateNode serverNode
= getServerStateNode(SERVER_FOR_TEST
);
96 assertNotNull(serverNode
);
97 assertTrue("serverNode should be ONLINE when cluster runs normally",
98 serverNode
.isInState(ServerState
.ONLINE
));
100 SCP_LATCH
= new CountDownLatch(1);
102 // Shutdown cluster and restart
103 List
<Integer
> ports
=
104 UTIL
.getHBaseCluster().getMaster().getServerManager().getOnlineServersList().stream()
105 .map(serverName
-> serverName
.getPort()).collect(Collectors
.toList());
106 LOG
.info("Shutting down cluster");
107 UTIL
.getHBaseCluster().killAll();
108 UTIL
.getHBaseCluster().waitUntilShutDown();
109 LOG
.info("Restarting cluster");
110 UTIL
.restartHBaseCluster(StartMiniClusterOption
.builder().masterClass(HMasterForTest
.class)
111 .numMasters(1).numRegionServers(3).rsPorts(ports
).build());
112 LOG
.info("Started cluster");
113 UTIL
.waitFor(60000, () -> UTIL
.getHBaseCluster().getMaster().isInitialized());
114 LOG
.info("Started cluster master, waiting for {}", SERVER_FOR_TEST
);
115 UTIL
.waitFor(60000, () -> getServerStateNode(SERVER_FOR_TEST
) != null);
116 serverNode
= getServerStateNode(SERVER_FOR_TEST
);
117 assertFalse("serverNode should not be ONLINE during SCP processing",
118 serverNode
.isInState(ServerState
.ONLINE
));
119 Optional
<Procedure
<?
>> procedure
= UTIL
.getHBaseCluster().getMaster().getProcedures().stream()
120 .filter(p
-> (p
instanceof ServerCrashProcedure
) &&
121 ((ServerCrashProcedure
) p
).getServerName().equals(SERVER_FOR_TEST
)).findAny();
122 assertTrue("Should have one SCP for " + SERVER_FOR_TEST
, procedure
.isPresent());
123 assertTrue("Submit the SCP for the same serverName " + SERVER_FOR_TEST
+ " which should fail",
124 UTIL
.getHBaseCluster().getMaster().getServerManager().expireServer(SERVER_FOR_TEST
) ==
125 Procedure
.NO_PROC_ID
);
127 // Wait the SCP to finish
128 LOG
.info("Waiting on latch");
129 SCP_LATCH
.countDown();
130 UTIL
.waitFor(60000, () -> procedure
.get().isFinished());
132 assertFalse("Even when the SCP is finished, the duplicate SCP should not be scheduled for " +
134 UTIL
.getHBaseCluster().getMaster().getServerManager().expireServer(SERVER_FOR_TEST
) ==
135 Procedure
.NO_PROC_ID
);
136 serverNode
= UTIL
.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates()
137 .getServerNode(SERVER_FOR_TEST
);
138 assertNull("serverNode should be deleted after SCP finished", serverNode
);
141 private void setupCluster() throws Exception
{
142 LOG
.info("Setup cluster");
143 UTIL
.startMiniCluster(
144 StartMiniClusterOption
.builder().masterClass(HMasterForTest
.class).numMasters(1)
145 .numRegionServers(3).build());
146 LOG
.info("Cluster is up");
147 UTIL
.waitFor(60000, () -> UTIL
.getMiniHBaseCluster().getMaster().isInitialized());
148 LOG
.info("Master is up");
149 // wait for all SCPs finished
150 UTIL
.waitFor(60000, () -> UTIL
.getHBaseCluster().getMaster().getProcedures().stream()
151 .noneMatch(p
-> p
instanceof ServerCrashProcedure
));
155 private void setupTable() throws Exception
{
156 TableName tableName
= TABLES
[0];
157 UTIL
.createMultiRegionTable(tableName
, FAMILY
);
158 UTIL
.waitTableAvailable(tableName
);
159 Table table
= UTIL
.getConnection().getTable(tableName
);
160 for (int i
= 0; i
< 100; i
++) {
161 UTIL
.loadTable(table
, FAMILY
);
165 public static final class HMasterForTest
extends HMaster
{
167 public HMasterForTest(Configuration conf
) throws IOException
{
172 protected AssignmentManager
createAssignmentManager(MasterServices master
) {
173 return new AssignmentManagerForTest(master
);
177 private static final class AssignmentManagerForTest
extends AssignmentManager
{
179 public AssignmentManagerForTest(MasterServices master
) {
184 public List
<RegionInfo
> getRegionsOnServer(ServerName serverName
) {
185 List
<RegionInfo
> regions
= super.getRegionsOnServer(serverName
);
186 // ServerCrashProcedure will call this method, so wait the CountDownLatch here
187 if (SCP_LATCH
!= null && SERVER_FOR_TEST
!= null && serverName
.equals(SERVER_FOR_TEST
)) {
189 LOG
.info("ServerCrashProcedure wait the CountDownLatch here");
191 LOG
.info("Continue the ServerCrashProcedure");
193 } catch (InterruptedException e
) {
194 throw new RuntimeException(e
);