2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org
.apache
.hadoop
.hbase
.master
;
20 import static org
.junit
.Assert
.assertEquals
;
21 import static org
.junit
.Assert
.assertNotNull
;
22 import static org
.junit
.Assert
.assertTrue
;
23 import java
.util
.List
;
24 import java
.util
.concurrent
.TimeUnit
;
25 import org
.apache
.hadoop
.hbase
.ClusterMetrics
;
26 import org
.apache
.hadoop
.hbase
.HBaseClassTestRule
;
27 import org
.apache
.hadoop
.hbase
.HBaseTestingUtil
;
28 import org
.apache
.hadoop
.hbase
.ServerName
;
29 import org
.apache
.hadoop
.hbase
.SingleProcessHBaseCluster
;
30 import org
.apache
.hadoop
.hbase
.StartTestingClusterOption
;
31 import org
.apache
.hadoop
.hbase
.master
.RegionState
.State
;
32 import org
.apache
.hadoop
.hbase
.regionserver
.HRegionServer
;
33 import org
.apache
.hadoop
.hbase
.testclassification
.FlakeyTests
;
34 import org
.apache
.hadoop
.hbase
.testclassification
.LargeTests
;
35 import org
.apache
.hadoop
.hbase
.util
.JVMClusterUtil
.MasterThread
;
36 import org
.apache
.hadoop
.hbase
.zookeeper
.MetaTableLocator
;
37 import org
.junit
.ClassRule
;
38 import org
.junit
.Rule
;
39 import org
.junit
.Test
;
40 import org
.junit
.experimental
.categories
.Category
;
41 import org
.junit
.rules
.TestName
;
42 import org
.slf4j
.Logger
;
43 import org
.slf4j
.LoggerFactory
;
45 @Category({FlakeyTests
.class, LargeTests
.class})
46 public class TestMasterFailover
{
49 public static final HBaseClassTestRule CLASS_RULE
=
50 HBaseClassTestRule
.forClass(TestMasterFailover
.class);
52 private static final Logger LOG
= LoggerFactory
.getLogger(TestMasterFailover
.class);
53 @Rule public TestName name
= new TestName();
56 * Simple test of master failover.
58 * Starts with three masters. Kills a backup master. Then kills the active
59 * master. Ensures the final master becomes active and we can still contact
63 public void testSimpleMasterFailover() throws Exception
{
64 final int NUM_MASTERS
= 3;
68 HBaseTestingUtil TEST_UTIL
= new HBaseTestingUtil();
70 StartTestingClusterOption option
= StartTestingClusterOption
.builder()
71 .numMasters(NUM_MASTERS
).numRegionServers(NUM_RS
).numDataNodes(NUM_RS
).build();
72 TEST_UTIL
.startMiniCluster(option
);
73 SingleProcessHBaseCluster cluster
= TEST_UTIL
.getHBaseCluster();
75 // get all the master threads
76 List
<MasterThread
> masterThreads
= cluster
.getMasterThreads();
78 // wait for each to come online
79 for (MasterThread mt
: masterThreads
) {
80 assertTrue(mt
.isAlive());
83 // verify only one is the active master and we have right number
86 ServerName activeName
= null;
87 HMaster active
= null;
88 for (int i
= 0; i
< masterThreads
.size(); i
++) {
89 if (masterThreads
.get(i
).getMaster().isActiveMaster()) {
92 active
= masterThreads
.get(activeIndex
).getMaster();
93 activeName
= active
.getServerName();
96 assertEquals(1, numActive
);
97 assertEquals(NUM_MASTERS
, masterThreads
.size());
98 LOG
.info("Active master " + activeName
);
100 // Check that ClusterStatus reports the correct active and backup masters
101 assertNotNull(active
);
102 ClusterMetrics status
= active
.getClusterMetrics();
103 assertEquals(activeName
, status
.getMasterName());
104 assertEquals(2, status
.getBackupMasterNames().size());
106 // attempt to stop one of the inactive masters
107 int backupIndex
= (activeIndex
== 0 ?
1 : activeIndex
- 1);
108 HMaster master
= cluster
.getMaster(backupIndex
);
109 LOG
.debug("\n\nStopping a backup master: " + master
.getServerName() + "\n");
110 cluster
.stopMaster(backupIndex
, false);
111 cluster
.waitOnMaster(backupIndex
);
113 // Verify still one active master and it's the same
114 for (int i
= 0; i
< masterThreads
.size(); i
++) {
115 if (masterThreads
.get(i
).getMaster().isActiveMaster()) {
116 assertEquals(activeName
, masterThreads
.get(i
).getMaster().getServerName());
118 active
= masterThreads
.get(activeIndex
).getMaster();
121 assertEquals(1, numActive
);
122 assertEquals(2, masterThreads
.size());
123 int rsCount
= masterThreads
.get(activeIndex
).getMaster().getClusterMetrics()
124 .getLiveServerMetrics().size();
125 LOG
.info("Active master " + active
.getServerName() + " managing " + rsCount
+
127 assertEquals(3, rsCount
);
129 // wait for the active master to acknowledge loss of the backup from ZK
130 final HMaster activeFinal
= active
;
132 TimeUnit
.MINUTES
.toMillis(5), () -> activeFinal
.getBackupMasters().size() == 1);
134 // Check that ClusterStatus reports the correct active and backup masters
135 assertNotNull(active
);
136 status
= active
.getClusterMetrics();
137 assertEquals(activeName
, status
.getMasterName());
138 assertEquals(1, status
.getBackupMasterNames().size());
140 // kill the active master
141 LOG
.debug("\n\nStopping the active master " + active
.getServerName() + "\n");
142 cluster
.stopMaster(activeIndex
, false);
143 cluster
.waitOnMaster(activeIndex
);
145 // wait for an active master to show up and be ready
146 assertTrue(cluster
.waitForActiveAndReadyMaster());
148 LOG
.debug("\n\nVerifying backup master is now active\n");
149 // should only have one master now
150 assertEquals(1, masterThreads
.size());
152 // and he should be active
153 active
= masterThreads
.get(0).getMaster();
154 assertNotNull(active
);
155 status
= active
.getClusterMetrics();
156 ServerName masterName
= status
.getMasterName();
157 assertNotNull(masterName
);
158 assertEquals(active
.getServerName(), masterName
);
159 assertTrue(active
.isActiveMaster());
160 assertEquals(0, status
.getBackupMasterNames().size());
161 int rss
= status
.getLiveServerMetrics().size();
162 LOG
.info("Active master {} managing {} region servers", masterName
.getServerName(), rss
);
163 assertEquals(3, rss
);
166 TEST_UTIL
.shutdownMiniCluster();
171 * Test meta in transition when master failover.
172 * This test used to manipulate region state up in zk. That is not allowed any more in hbase2
173 * so I removed that messing. That makes this test anemic.
176 public void testMetaInTransitionWhenMasterFailover() throws Exception
{
178 HBaseTestingUtil TEST_UTIL
= new HBaseTestingUtil();
179 TEST_UTIL
.startMiniCluster();
181 SingleProcessHBaseCluster cluster
= TEST_UTIL
.getHBaseCluster();
182 LOG
.info("Cluster started");
184 HMaster activeMaster
= cluster
.getMaster();
185 ServerName metaServerName
= cluster
.getServerHoldingMeta();
186 HRegionServer hrs
= cluster
.getRegionServer(metaServerName
);
188 // Now kill master, meta should remain on rs, where we placed it before.
189 LOG
.info("Aborting master");
190 activeMaster
.abort("test-kill");
191 cluster
.waitForMasterToStop(activeMaster
.getServerName(), 30000);
192 LOG
.info("Master has aborted");
194 // meta should remain where it was
195 RegionState metaState
= MetaTableLocator
.getMetaRegionState(hrs
.getZooKeeper());
196 assertEquals("hbase:meta should be online on RS",
197 metaState
.getServerName(), metaServerName
);
198 assertEquals("hbase:meta should be online on RS", State
.OPEN
, metaState
.getState());
200 // Start up a new master
201 LOG
.info("Starting up a new master");
202 activeMaster
= cluster
.startMaster().getMaster();
203 LOG
.info("Waiting for master to be ready");
204 cluster
.waitForActiveAndReadyMaster();
205 LOG
.info("Master is ready");
207 // ensure meta is still deployed on RS
208 metaState
= MetaTableLocator
.getMetaRegionState(activeMaster
.getZooKeeper());
209 assertEquals("hbase:meta should be online on RS",
210 metaState
.getServerName(), metaServerName
);
211 assertEquals("hbase:meta should be online on RS", State
.OPEN
, metaState
.getState());
213 // Done, shutdown the cluster
215 TEST_UTIL
.shutdownMiniCluster();