2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org
.apache
.hadoop
.hbase
;
20 import java
.io
.Closeable
;
21 import java
.io
.IOException
;
22 import org
.apache
.hadoop
.conf
.Configurable
;
23 import org
.apache
.hadoop
.conf
.Configuration
;
24 import org
.apache
.hadoop
.hbase
.client
.RegionInfoBuilder
;
25 import org
.apache
.hadoop
.hbase
.util
.EnvironmentEdgeManager
;
26 import org
.apache
.hadoop
.hbase
.util
.Threads
;
27 import org
.apache
.yetus
.audience
.InterfaceAudience
;
28 import org
.apache
.yetus
.audience
.InterfaceStability
;
31 * This class defines methods that can help with managing HBase clusters from unit tests and system
32 * tests. There are 3 types of cluster deployments:
34 * <li><b>SingleProcessHBaseCluster:</b> each server is run in the same JVM in separate threads,
35 * used by unit tests</li>
36 * <li><b>DistributedHBaseCluster:</b> the cluster is pre-deployed, system and integration tests can
37 * interact with the cluster.</li>
38 * <li><b>ProcessBasedLocalHBaseCluster:</b> each server is deployed locally but in separate JVMs.
42 * HBaseCluster unifies the way tests interact with the cluster, so that the same test can be run
43 * against a mini-cluster during unit test execution, or a distributed cluster having tens/hundreds
44 * of nodes during execution of integration tests.
46 * HBaseCluster exposes client-side public interfaces to tests, so that tests does not assume
47 * running in a particular mode. Not all the tests are suitable to be run on an actual cluster, and
48 * some tests will still need to mock stuff and introspect internal state. For those use cases from
49 * unit tests, or if more control is needed, you can use the subclasses directly. In that sense,
50 * this class does not abstract away <strong>every</strong> interface that SingleProcessHBaseCluster
51 * or DistributedHBaseCluster provide.
53 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience
.PHOENIX
)
54 @InterfaceStability.Evolving
55 public abstract class HBaseClusterInterface
implements Closeable
, Configurable
{
57 protected Configuration conf
;
59 /** the status of the cluster before we begin */
60 protected ClusterMetrics initialClusterStatus
;
63 * Construct an HBaseCluster
64 * @param conf Configuration to be used for cluster
66 public HBaseClusterInterface(Configuration conf
) {
71 public void setConf(Configuration conf
) {
76 public Configuration
getConf() {
81 * Returns a ClusterMetrics for this HBase cluster.
82 * @see #getInitialClusterMetrics()
84 public abstract ClusterMetrics
getClusterMetrics() throws IOException
;
87 * Returns a ClusterStatus for this HBase cluster as observed at the starting of the HBaseCluster
89 public ClusterMetrics
getInitialClusterMetrics() throws IOException
{
90 return initialClusterStatus
;
94 * Starts a new region server on the given hostname or if this is a mini/local cluster, starts a
95 * region server locally.
96 * @param hostname the hostname to start the regionserver on
97 * @throws IOException if something goes wrong
99 public abstract void startRegionServer(String hostname
, int port
) throws IOException
;
102 * Kills the region server process if this is a distributed cluster, otherwise this causes the
103 * region server to exit doing basic clean up only.
104 * @throws IOException if something goes wrong
106 public abstract void killRegionServer(ServerName serverName
) throws IOException
;
109 * Keeping track of killed servers and being able to check if a particular server was killed makes
110 * it possible to do fault tolerance testing for dead servers in a deterministic way. A concrete
111 * example of such case is - killing servers and waiting for all regions of a particular table to
112 * be assigned. We can check for server column in META table and that its value is not one of the
115 public abstract boolean isKilledRS(ServerName serverName
);
118 * Stops the given region server, by attempting a gradual stop.
119 * @throws IOException if something goes wrong
121 public abstract void stopRegionServer(ServerName serverName
) throws IOException
;
124 * Wait for the specified region server to join the cluster
125 * @throws IOException if something goes wrong or timeout occurs
127 public void waitForRegionServerToStart(String hostname
, int port
, long timeout
)
129 long start
= EnvironmentEdgeManager
.currentTime();
130 while ((EnvironmentEdgeManager
.currentTime() - start
) < timeout
) {
131 for (ServerName server
: getClusterMetrics().getLiveServerMetrics().keySet()) {
132 if (server
.getHostname().equals(hostname
) && server
.getPort() == port
) {
138 throw new IOException(
139 "did timeout " + timeout
+ "ms waiting for region server to start: " + hostname
);
143 * Wait for the specified region server to stop the thread / process.
144 * @throws IOException if something goes wrong or timeout occurs
146 public abstract void waitForRegionServerToStop(ServerName serverName
, long timeout
)
150 * Suspend the region server
151 * @param serverName the hostname to suspend the regionserver on
152 * @throws IOException if something goes wrong
154 public abstract void suspendRegionServer(ServerName serverName
) throws IOException
;
157 * Resume the region server
158 * @param serverName the hostname to resume the regionserver on
159 * @throws IOException if something goes wrong
161 public abstract void resumeRegionServer(ServerName serverName
) throws IOException
;
164 * Starts a new zookeeper node on the given hostname or if this is a mini/local cluster, silently
165 * logs warning message.
166 * @param hostname the hostname to start the regionserver on
167 * @throws IOException if something goes wrong
169 public abstract void startZkNode(String hostname
, int port
) throws IOException
;
172 * Kills the zookeeper node process if this is a distributed cluster, otherwise, this causes
173 * master to exit doing basic clean up only.
174 * @throws IOException if something goes wrong
176 public abstract void killZkNode(ServerName serverName
) throws IOException
;
179 * Stops the region zookeeper if this is a distributed cluster, otherwise silently logs warning
181 * @throws IOException if something goes wrong
183 public abstract void stopZkNode(ServerName serverName
) throws IOException
;
186 * Wait for the specified zookeeper node to join the cluster
187 * @throws IOException if something goes wrong or timeout occurs
189 public abstract void waitForZkNodeToStart(ServerName serverName
, long timeout
) throws IOException
;
192 * Wait for the specified zookeeper node to stop the thread / process.
193 * @throws IOException if something goes wrong or timeout occurs
195 public abstract void waitForZkNodeToStop(ServerName serverName
, long timeout
) throws IOException
;
198 * Starts a new datanode on the given hostname or if this is a mini/local cluster, silently logs
200 * @throws IOException if something goes wrong
202 public abstract void startDataNode(ServerName serverName
) throws IOException
;
205 * Kills the datanode process if this is a distributed cluster, otherwise, this causes master to
206 * exit doing basic clean up only.
207 * @throws IOException if something goes wrong
209 public abstract void killDataNode(ServerName serverName
) throws IOException
;
212 * Stops the datanode if this is a distributed cluster, otherwise silently logs warning message.
213 * @throws IOException if something goes wrong
215 public abstract void stopDataNode(ServerName serverName
) throws IOException
;
218 * Wait for the specified datanode to join the cluster
219 * @throws IOException if something goes wrong or timeout occurs
221 public abstract void waitForDataNodeToStart(ServerName serverName
, long timeout
)
225 * Wait for the specified datanode to stop the thread / process.
226 * @throws IOException if something goes wrong or timeout occurs
228 public abstract void waitForDataNodeToStop(ServerName serverName
, long timeout
)
232 * Starts a new namenode on the given hostname or if this is a mini/local cluster, silently logs
234 * @throws IOException if something goes wrong
236 public abstract void startNameNode(ServerName serverName
) throws IOException
;
239 * Kills the namenode process if this is a distributed cluster, otherwise, this causes master to
240 * exit doing basic clean up only.
241 * @throws IOException if something goes wrong
243 public abstract void killNameNode(ServerName serverName
) throws IOException
;
246 * Stops the namenode if this is a distributed cluster, otherwise silently logs warning message.
247 * @throws IOException if something goes wrong
249 public abstract void stopNameNode(ServerName serverName
) throws IOException
;
252 * Wait for the specified namenode to join the cluster
253 * @throws IOException if something goes wrong or timeout occurs
255 public abstract void waitForNameNodeToStart(ServerName serverName
, long timeout
)
259 * Wait for the specified namenode to stop
260 * @throws IOException if something goes wrong or timeout occurs
262 public abstract void waitForNameNodeToStop(ServerName serverName
, long timeout
)
266 * Starts a new master on the given hostname or if this is a mini/local cluster, starts a master
268 * @param hostname the hostname to start the master on
269 * @throws IOException if something goes wrong
271 public abstract void startMaster(String hostname
, int port
) throws IOException
;
274 * Kills the master process if this is a distributed cluster, otherwise, this causes master to
275 * exit doing basic clean up only.
276 * @throws IOException if something goes wrong
278 public abstract void killMaster(ServerName serverName
) throws IOException
;
281 * Stops the given master, by attempting a gradual stop.
282 * @throws IOException if something goes wrong
284 public abstract void stopMaster(ServerName serverName
) throws IOException
;
287 * Wait for the specified master to stop the thread / process.
288 * @throws IOException if something goes wrong or timeout occurs
290 public abstract void waitForMasterToStop(ServerName serverName
, long timeout
) throws IOException
;
293 * Blocks until there is an active master and that master has completed initialization.
294 * @return true if an active master becomes available. false if there are no masters left.
295 * @throws IOException if something goes wrong or timeout occurs
297 public boolean waitForActiveAndReadyMaster() throws IOException
{
298 return waitForActiveAndReadyMaster(Long
.MAX_VALUE
);
302 * Blocks until there is an active master and that master has completed initialization.
303 * @param timeout the timeout limit in ms
304 * @return true if an active master becomes available. false if there are no masters left.
306 public abstract boolean waitForActiveAndReadyMaster(long timeout
) throws IOException
;
309 * Wait for HBase Cluster to shut down.
311 public abstract void waitUntilShutDown() throws IOException
;
314 * Shut down the HBase cluster
316 public abstract void shutdown() throws IOException
;
319 * Restores the cluster to it's initial state if this is a real cluster, otherwise does nothing.
320 * This is a best effort restore. If the servers are not reachable, or insufficient permissions,
321 * etc. restoration might be partial.
322 * @return whether restoration is complete
324 public boolean restoreInitialStatus() throws IOException
{
325 return restoreClusterMetrics(getInitialClusterMetrics());
329 * Restores the cluster to given state if this is a real cluster, otherwise does nothing. This is
330 * a best effort restore. If the servers are not reachable, or insufficient permissions, etc.
331 * restoration might be partial.
332 * @return whether restoration is complete
334 public boolean restoreClusterMetrics(ClusterMetrics desiredStatus
) throws IOException
{
339 * Get the ServerName of region server serving the first hbase:meta region
341 public ServerName
getServerHoldingMeta() throws IOException
{
342 return getServerHoldingRegion(TableName
.META_TABLE_NAME
,
343 RegionInfoBuilder
.FIRST_META_REGIONINFO
.getRegionName());
347 * Get the ServerName of region server serving the specified region
348 * @param regionName Name of the region in bytes
349 * @param tn Table name that has the region.
350 * @return ServerName that hosts the region or null
352 public abstract ServerName
getServerHoldingRegion(final TableName tn
, byte[] regionName
)
356 * @return whether we are interacting with a distributed cluster as opposed to an in-process
357 * mini/local cluster.
359 public boolean isDistributedCluster() {
364 * Closes all the resources held open for this cluster. Note that this call does not shutdown the
369 public abstract void close() throws IOException
;
372 * Wait for the namenode.
374 public void waitForNamenodeAvailable() throws InterruptedException
{
377 public void waitForDatanodesRegistered(int nbDN
) throws Exception
{