2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org
.apache
.hadoop
.hbase
;
20 import java
.io
.Closeable
;
21 import java
.io
.IOException
;
22 import org
.apache
.hadoop
.conf
.Configurable
;
23 import org
.apache
.hadoop
.conf
.Configuration
;
24 import org
.apache
.hadoop
.hbase
.client
.RegionInfoBuilder
;
25 import org
.apache
.hadoop
.hbase
.util
.Threads
;
26 import org
.apache
.yetus
.audience
.InterfaceAudience
;
27 import org
.slf4j
.Logger
;
28 import org
.slf4j
.LoggerFactory
;
31 * This class defines methods that can help with managing HBase clusters
32 * from unit tests and system tests. There are 3 types of cluster deployments:
34 * <li><b>MiniHBaseCluster:</b> each server is run in the same JVM in separate threads,
35 * used by unit tests</li>
36 * <li><b>DistributedHBaseCluster:</b> the cluster is pre-deployed, system and integration tests can
37 * interact with the cluster. </li>
38 * <li><b>ProcessBasedLocalHBaseCluster:</b> each server is deployed locally but in separate
42 * HBaseCluster unifies the way tests interact with the cluster, so that the same test can
43 * be run against a mini-cluster during unit test execution, or a distributed cluster having
44 * tens/hundreds of nodes during execution of integration tests.
47 * HBaseCluster exposes client-side public interfaces to tests, so that tests does not assume
48 * running in a particular mode. Not all the tests are suitable to be run on an actual cluster,
49 * and some tests will still need to mock stuff and introspect internal state. For those use
50 * cases from unit tests, or if more control is needed, you can use the subclasses directly.
51 * In that sense, this class does not abstract away <strong>every</strong> interface that
52 * MiniHBaseCluster or DistributedHBaseCluster provide.
54 @InterfaceAudience.Private
55 public abstract class HBaseCluster
implements Closeable
, Configurable
{
56 // Log is being used in DistributedHBaseCluster class, hence keeping it as package scope
57 static final Logger LOG
= LoggerFactory
.getLogger(HBaseCluster
.class.getName());
58 protected Configuration conf
;
60 /** the status of the cluster before we begin */
61 protected ClusterMetrics initialClusterStatus
;
64 * Construct an HBaseCluster
65 * @param conf Configuration to be used for cluster
67 public HBaseCluster(Configuration conf
) {
72 public void setConf(Configuration conf
) {
77 public Configuration
getConf() {
82 * Returns a ClusterMetrics for this HBase cluster.
83 * @see #getInitialClusterMetrics()
85 public abstract ClusterMetrics
getClusterMetrics() throws IOException
;
88 * Returns a ClusterStatus for this HBase cluster as observed at the
89 * starting of the HBaseCluster
91 public ClusterMetrics
getInitialClusterMetrics() throws IOException
{
92 return initialClusterStatus
;
96 * Starts a new region server on the given hostname or if this is a mini/local cluster,
97 * starts a region server locally.
98 * @param hostname the hostname to start the regionserver on
99 * @throws IOException if something goes wrong
101 public abstract void startRegionServer(String hostname
, int port
) throws IOException
;
104 * Kills the region server process if this is a distributed cluster, otherwise
105 * this causes the region server to exit doing basic clean up only.
106 * @throws IOException if something goes wrong
108 public abstract void killRegionServer(ServerName serverName
) throws IOException
;
111 * Keeping track of killed servers and being able to check if a particular server was killed makes
112 * it possible to do fault tolerance testing for dead servers in a deterministic way. A concrete
113 * example of such case is - killing servers and waiting for all regions of a particular table
114 * to be assigned. We can check for server column in META table and that its value is not one
115 * of the killed servers.
117 public abstract boolean isKilledRS(ServerName serverName
);
120 * Stops the given region server, by attempting a gradual stop.
121 * @throws IOException if something goes wrong
123 public abstract void stopRegionServer(ServerName serverName
) throws IOException
;
126 * Wait for the specified region server to join the cluster
127 * @throws IOException if something goes wrong or timeout occurs
129 public void waitForRegionServerToStart(String hostname
, int port
, long timeout
)
131 long start
= System
.currentTimeMillis();
132 while ((System
.currentTimeMillis() - start
) < timeout
) {
133 for (ServerName server
: getClusterMetrics().getLiveServerMetrics().keySet()) {
134 if (server
.getHostname().equals(hostname
) && server
.getPort() == port
) {
140 throw new IOException("did timeout " + timeout
+ "ms waiting for region server to start: "
145 * Wait for the specified region server to stop the thread / process.
146 * @throws IOException if something goes wrong or timeout occurs
148 public abstract void waitForRegionServerToStop(ServerName serverName
, long timeout
)
152 * Suspend the region server
153 * @param serverName the hostname to suspend the regionserver on
154 * @throws IOException if something goes wrong
156 public abstract void suspendRegionServer(ServerName serverName
) throws IOException
;
159 * Resume the region server
160 * @param serverName the hostname to resume the regionserver on
161 * @throws IOException if something goes wrong
163 public abstract void resumeRegionServer(ServerName serverName
) throws IOException
;
166 * Starts a new zookeeper node on the given hostname or if this is a mini/local cluster,
167 * silently logs warning message.
168 * @param hostname the hostname to start the regionserver on
169 * @throws IOException if something goes wrong
171 public abstract void startZkNode(String hostname
, int port
) throws IOException
;
174 * Kills the zookeeper node process if this is a distributed cluster, otherwise,
175 * this causes master to exit doing basic clean up only.
176 * @throws IOException if something goes wrong
178 public abstract void killZkNode(ServerName serverName
) throws IOException
;
181 * Stops the region zookeeper if this is a distributed cluster, otherwise
182 * silently logs warning message.
183 * @throws IOException if something goes wrong
185 public abstract void stopZkNode(ServerName serverName
) throws IOException
;
188 * Wait for the specified zookeeper node to join the cluster
189 * @throws IOException if something goes wrong or timeout occurs
191 public abstract void waitForZkNodeToStart(ServerName serverName
, long timeout
)
195 * Wait for the specified zookeeper node to stop the thread / process.
196 * @throws IOException if something goes wrong or timeout occurs
198 public abstract void waitForZkNodeToStop(ServerName serverName
, long timeout
)
202 * Starts a new datanode on the given hostname or if this is a mini/local cluster,
203 * silently logs warning message.
204 * @throws IOException if something goes wrong
206 public abstract void startDataNode(ServerName serverName
) throws IOException
;
209 * Kills the datanode process if this is a distributed cluster, otherwise,
210 * this causes master to exit doing basic clean up only.
211 * @throws IOException if something goes wrong
213 public abstract void killDataNode(ServerName serverName
) throws IOException
;
216 * Stops the datanode if this is a distributed cluster, otherwise
217 * silently logs warning message.
218 * @throws IOException if something goes wrong
220 public abstract void stopDataNode(ServerName serverName
) throws IOException
;
223 * Wait for the specified datanode to join the cluster
224 * @throws IOException if something goes wrong or timeout occurs
226 public abstract void waitForDataNodeToStart(ServerName serverName
, long timeout
)
230 * Wait for the specified datanode to stop the thread / process.
231 * @throws IOException if something goes wrong or timeout occurs
233 public abstract void waitForDataNodeToStop(ServerName serverName
, long timeout
)
237 * Starts a new namenode on the given hostname or if this is a mini/local cluster, silently logs
239 * @throws IOException if something goes wrong
241 public abstract void startNameNode(ServerName serverName
) throws IOException
;
244 * Kills the namenode process if this is a distributed cluster, otherwise, this causes master to
245 * exit doing basic clean up only.
246 * @throws IOException if something goes wrong
248 public abstract void killNameNode(ServerName serverName
) throws IOException
;
251 * Stops the namenode if this is a distributed cluster, otherwise silently logs warning message.
252 * @throws IOException if something goes wrong
254 public abstract void stopNameNode(ServerName serverName
) throws IOException
;
257 * Wait for the specified namenode to join the cluster
258 * @throws IOException if something goes wrong or timeout occurs
260 public abstract void waitForNameNodeToStart(ServerName serverName
, long timeout
)
264 * Wait for the specified namenode to stop
265 * @throws IOException if something goes wrong or timeout occurs
267 public abstract void waitForNameNodeToStop(ServerName serverName
, long timeout
)
271 * Starts a new master on the given hostname or if this is a mini/local cluster,
272 * starts a master locally.
273 * @param hostname the hostname to start the master on
274 * @throws IOException if something goes wrong
276 public abstract void startMaster(String hostname
, int port
) throws IOException
;
279 * Kills the master process if this is a distributed cluster, otherwise,
280 * this causes master to exit doing basic clean up only.
281 * @throws IOException if something goes wrong
283 public abstract void killMaster(ServerName serverName
) throws IOException
;
286 * Stops the given master, by attempting a gradual stop.
287 * @throws IOException if something goes wrong
289 public abstract void stopMaster(ServerName serverName
) throws IOException
;
292 * Wait for the specified master to stop the thread / process.
293 * @throws IOException if something goes wrong or timeout occurs
295 public abstract void waitForMasterToStop(ServerName serverName
, long timeout
)
299 * Blocks until there is an active master and that master has completed
302 * @return true if an active master becomes available. false if there are no
304 * @throws IOException if something goes wrong or timeout occurs
306 public boolean waitForActiveAndReadyMaster()
308 return waitForActiveAndReadyMaster(Long
.MAX_VALUE
);
312 * Blocks until there is an active master and that master has completed
314 * @param timeout the timeout limit in ms
315 * @return true if an active master becomes available. false if there are no
318 public abstract boolean waitForActiveAndReadyMaster(long timeout
)
322 * Wait for HBase Cluster to shut down.
324 public abstract void waitUntilShutDown() throws IOException
;
327 * Shut down the HBase cluster
329 public abstract void shutdown() throws IOException
;
332 * Restores the cluster to it's initial state if this is a real cluster,
333 * otherwise does nothing.
334 * This is a best effort restore. If the servers are not reachable, or insufficient
335 * permissions, etc. restoration might be partial.
336 * @return whether restoration is complete
338 public boolean restoreInitialStatus() throws IOException
{
339 return restoreClusterMetrics(getInitialClusterMetrics());
343 * Restores the cluster to given state if this is a real cluster,
344 * otherwise does nothing.
345 * This is a best effort restore. If the servers are not reachable, or insufficient
346 * permissions, etc. restoration might be partial.
347 * @return whether restoration is complete
349 public boolean restoreClusterMetrics(ClusterMetrics desiredStatus
) throws IOException
{
354 * Get the ServerName of region server serving the first hbase:meta region
356 public ServerName
getServerHoldingMeta() throws IOException
{
357 return getServerHoldingRegion(TableName
.META_TABLE_NAME
,
358 RegionInfoBuilder
.FIRST_META_REGIONINFO
.getRegionName());
362 * Get the ServerName of region server serving the specified region
363 * @param regionName Name of the region in bytes
364 * @param tn Table name that has the region.
365 * @return ServerName that hosts the region or null
367 public abstract ServerName
getServerHoldingRegion(final TableName tn
, byte[] regionName
)
371 * @return whether we are interacting with a distributed cluster as opposed to an
372 * in-process mini/local cluster.
374 public boolean isDistributedCluster() {
379 * Closes all the resources held open for this cluster. Note that this call does not shutdown
384 public abstract void close() throws IOException
;
387 * Wait for the namenode.
389 * @throws InterruptedException
391 public void waitForNamenodeAvailable() throws InterruptedException
{
394 public void waitForDatanodesRegistered(int nbDN
) throws Exception
{