HBASE-24033 Add ut for loading the corrupt recovered hfiles (#1322)
[hbase.git] / hbase-server / src / test / java / org / apache / hadoop / hbase / HBaseCluster.java
blob85dff357ca620cc0ce28a3c5ad671ba3e416ab7d
1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org.apache.hadoop.hbase;
20 import java.io.Closeable;
21 import java.io.IOException;
22 import org.apache.hadoop.conf.Configurable;
23 import org.apache.hadoop.conf.Configuration;
24 import org.apache.hadoop.hbase.client.RegionInfoBuilder;
25 import org.apache.hadoop.hbase.util.Threads;
26 import org.apache.yetus.audience.InterfaceAudience;
27 import org.slf4j.Logger;
28 import org.slf4j.LoggerFactory;
30 /**
31 * This class defines methods that can help with managing HBase clusters
32 * from unit tests and system tests. There are 3 types of cluster deployments:
33 * <ul>
34 * <li><b>MiniHBaseCluster:</b> each server is run in the same JVM in separate threads,
35 * used by unit tests</li>
36 * <li><b>DistributedHBaseCluster:</b> the cluster is pre-deployed, system and integration tests can
37 * interact with the cluster. </li>
38 * <li><b>ProcessBasedLocalHBaseCluster:</b> each server is deployed locally but in separate
39 * JVMs. </li>
40 * </ul>
41 * <p>
42 * HBaseCluster unifies the way tests interact with the cluster, so that the same test can
43 * be run against a mini-cluster during unit test execution, or a distributed cluster having
44 * tens/hundreds of nodes during execution of integration tests.
46 * <p>
47 * HBaseCluster exposes client-side public interfaces to tests, so that tests does not assume
48 * running in a particular mode. Not all the tests are suitable to be run on an actual cluster,
49 * and some tests will still need to mock stuff and introspect internal state. For those use
50 * cases from unit tests, or if more control is needed, you can use the subclasses directly.
51 * In that sense, this class does not abstract away <strong>every</strong> interface that
52 * MiniHBaseCluster or DistributedHBaseCluster provide.
54 @InterfaceAudience.Private
55 public abstract class HBaseCluster implements Closeable, Configurable {
56 // Log is being used in DistributedHBaseCluster class, hence keeping it as package scope
57 static final Logger LOG = LoggerFactory.getLogger(HBaseCluster.class.getName());
58 protected Configuration conf;
60 /** the status of the cluster before we begin */
61 protected ClusterMetrics initialClusterStatus;
63 /**
64 * Construct an HBaseCluster
65 * @param conf Configuration to be used for cluster
67 public HBaseCluster(Configuration conf) {
68 setConf(conf);
71 @Override
72 public void setConf(Configuration conf) {
73 this.conf = conf;
76 @Override
77 public Configuration getConf() {
78 return conf;
81 /**
82 * Returns a ClusterMetrics for this HBase cluster.
83 * @see #getInitialClusterMetrics()
85 public abstract ClusterMetrics getClusterMetrics() throws IOException;
87 /**
88 * Returns a ClusterStatus for this HBase cluster as observed at the
89 * starting of the HBaseCluster
91 public ClusterMetrics getInitialClusterMetrics() throws IOException {
92 return initialClusterStatus;
95 /**
96 * Starts a new region server on the given hostname or if this is a mini/local cluster,
97 * starts a region server locally.
98 * @param hostname the hostname to start the regionserver on
99 * @throws IOException if something goes wrong
101 public abstract void startRegionServer(String hostname, int port) throws IOException;
104 * Kills the region server process if this is a distributed cluster, otherwise
105 * this causes the region server to exit doing basic clean up only.
106 * @throws IOException if something goes wrong
108 public abstract void killRegionServer(ServerName serverName) throws IOException;
111 * Keeping track of killed servers and being able to check if a particular server was killed makes
112 * it possible to do fault tolerance testing for dead servers in a deterministic way. A concrete
113 * example of such case is - killing servers and waiting for all regions of a particular table
114 * to be assigned. We can check for server column in META table and that its value is not one
115 * of the killed servers.
117 public abstract boolean isKilledRS(ServerName serverName);
120 * Stops the given region server, by attempting a gradual stop.
121 * @throws IOException if something goes wrong
123 public abstract void stopRegionServer(ServerName serverName) throws IOException;
126 * Wait for the specified region server to join the cluster
127 * @throws IOException if something goes wrong or timeout occurs
129 public void waitForRegionServerToStart(String hostname, int port, long timeout)
130 throws IOException {
131 long start = System.currentTimeMillis();
132 while ((System.currentTimeMillis() - start) < timeout) {
133 for (ServerName server : getClusterMetrics().getLiveServerMetrics().keySet()) {
134 if (server.getHostname().equals(hostname) && server.getPort() == port) {
135 return;
138 Threads.sleep(100);
140 throw new IOException("did timeout " + timeout + "ms waiting for region server to start: "
141 + hostname);
145 * Wait for the specified region server to stop the thread / process.
146 * @throws IOException if something goes wrong or timeout occurs
148 public abstract void waitForRegionServerToStop(ServerName serverName, long timeout)
149 throws IOException;
152 * Suspend the region server
153 * @param serverName the hostname to suspend the regionserver on
154 * @throws IOException if something goes wrong
156 public abstract void suspendRegionServer(ServerName serverName) throws IOException;
159 * Resume the region server
160 * @param serverName the hostname to resume the regionserver on
161 * @throws IOException if something goes wrong
163 public abstract void resumeRegionServer(ServerName serverName) throws IOException;
166 * Starts a new zookeeper node on the given hostname or if this is a mini/local cluster,
167 * silently logs warning message.
168 * @param hostname the hostname to start the regionserver on
169 * @throws IOException if something goes wrong
171 public abstract void startZkNode(String hostname, int port) throws IOException;
174 * Kills the zookeeper node process if this is a distributed cluster, otherwise,
175 * this causes master to exit doing basic clean up only.
176 * @throws IOException if something goes wrong
178 public abstract void killZkNode(ServerName serverName) throws IOException;
181 * Stops the region zookeeper if this is a distributed cluster, otherwise
182 * silently logs warning message.
183 * @throws IOException if something goes wrong
185 public abstract void stopZkNode(ServerName serverName) throws IOException;
188 * Wait for the specified zookeeper node to join the cluster
189 * @throws IOException if something goes wrong or timeout occurs
191 public abstract void waitForZkNodeToStart(ServerName serverName, long timeout)
192 throws IOException;
195 * Wait for the specified zookeeper node to stop the thread / process.
196 * @throws IOException if something goes wrong or timeout occurs
198 public abstract void waitForZkNodeToStop(ServerName serverName, long timeout)
199 throws IOException;
202 * Starts a new datanode on the given hostname or if this is a mini/local cluster,
203 * silently logs warning message.
204 * @throws IOException if something goes wrong
206 public abstract void startDataNode(ServerName serverName) throws IOException;
209 * Kills the datanode process if this is a distributed cluster, otherwise,
210 * this causes master to exit doing basic clean up only.
211 * @throws IOException if something goes wrong
213 public abstract void killDataNode(ServerName serverName) throws IOException;
216 * Stops the datanode if this is a distributed cluster, otherwise
217 * silently logs warning message.
218 * @throws IOException if something goes wrong
220 public abstract void stopDataNode(ServerName serverName) throws IOException;
223 * Wait for the specified datanode to join the cluster
224 * @throws IOException if something goes wrong or timeout occurs
226 public abstract void waitForDataNodeToStart(ServerName serverName, long timeout)
227 throws IOException;
230 * Wait for the specified datanode to stop the thread / process.
231 * @throws IOException if something goes wrong or timeout occurs
233 public abstract void waitForDataNodeToStop(ServerName serverName, long timeout)
234 throws IOException;
237 * Starts a new namenode on the given hostname or if this is a mini/local cluster, silently logs
238 * warning message.
239 * @throws IOException if something goes wrong
241 public abstract void startNameNode(ServerName serverName) throws IOException;
244 * Kills the namenode process if this is a distributed cluster, otherwise, this causes master to
245 * exit doing basic clean up only.
246 * @throws IOException if something goes wrong
248 public abstract void killNameNode(ServerName serverName) throws IOException;
251 * Stops the namenode if this is a distributed cluster, otherwise silently logs warning message.
252 * @throws IOException if something goes wrong
254 public abstract void stopNameNode(ServerName serverName) throws IOException;
257 * Wait for the specified namenode to join the cluster
258 * @throws IOException if something goes wrong or timeout occurs
260 public abstract void waitForNameNodeToStart(ServerName serverName, long timeout)
261 throws IOException;
264 * Wait for the specified namenode to stop
265 * @throws IOException if something goes wrong or timeout occurs
267 public abstract void waitForNameNodeToStop(ServerName serverName, long timeout)
268 throws IOException;
271 * Starts a new master on the given hostname or if this is a mini/local cluster,
272 * starts a master locally.
273 * @param hostname the hostname to start the master on
274 * @throws IOException if something goes wrong
276 public abstract void startMaster(String hostname, int port) throws IOException;
279 * Kills the master process if this is a distributed cluster, otherwise,
280 * this causes master to exit doing basic clean up only.
281 * @throws IOException if something goes wrong
283 public abstract void killMaster(ServerName serverName) throws IOException;
286 * Stops the given master, by attempting a gradual stop.
287 * @throws IOException if something goes wrong
289 public abstract void stopMaster(ServerName serverName) throws IOException;
292 * Wait for the specified master to stop the thread / process.
293 * @throws IOException if something goes wrong or timeout occurs
295 public abstract void waitForMasterToStop(ServerName serverName, long timeout)
296 throws IOException;
299 * Blocks until there is an active master and that master has completed
300 * initialization.
302 * @return true if an active master becomes available. false if there are no
303 * masters left.
304 * @throws IOException if something goes wrong or timeout occurs
306 public boolean waitForActiveAndReadyMaster()
307 throws IOException {
308 return waitForActiveAndReadyMaster(Long.MAX_VALUE);
312 * Blocks until there is an active master and that master has completed
313 * initialization.
314 * @param timeout the timeout limit in ms
315 * @return true if an active master becomes available. false if there are no
316 * masters left.
318 public abstract boolean waitForActiveAndReadyMaster(long timeout)
319 throws IOException;
322 * Wait for HBase Cluster to shut down.
324 public abstract void waitUntilShutDown() throws IOException;
327 * Shut down the HBase cluster
329 public abstract void shutdown() throws IOException;
332 * Restores the cluster to it's initial state if this is a real cluster,
333 * otherwise does nothing.
334 * This is a best effort restore. If the servers are not reachable, or insufficient
335 * permissions, etc. restoration might be partial.
336 * @return whether restoration is complete
338 public boolean restoreInitialStatus() throws IOException {
339 return restoreClusterMetrics(getInitialClusterMetrics());
343 * Restores the cluster to given state if this is a real cluster,
344 * otherwise does nothing.
345 * This is a best effort restore. If the servers are not reachable, or insufficient
346 * permissions, etc. restoration might be partial.
347 * @return whether restoration is complete
349 public boolean restoreClusterMetrics(ClusterMetrics desiredStatus) throws IOException {
350 return true;
354 * Get the ServerName of region server serving the first hbase:meta region
356 public ServerName getServerHoldingMeta() throws IOException {
357 return getServerHoldingRegion(TableName.META_TABLE_NAME,
358 RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionName());
362 * Get the ServerName of region server serving the specified region
363 * @param regionName Name of the region in bytes
364 * @param tn Table name that has the region.
365 * @return ServerName that hosts the region or null
367 public abstract ServerName getServerHoldingRegion(final TableName tn, byte[] regionName)
368 throws IOException;
371 * @return whether we are interacting with a distributed cluster as opposed to an
372 * in-process mini/local cluster.
374 public boolean isDistributedCluster() {
375 return false;
379 * Closes all the resources held open for this cluster. Note that this call does not shutdown
380 * the cluster.
381 * @see #shutdown()
383 @Override
384 public abstract void close() throws IOException;
387 * Wait for the namenode.
389 * @throws InterruptedException
391 public void waitForNamenodeAvailable() throws InterruptedException {
394 public void waitForDatanodesRegistered(int nbDN) throws Exception {