HBASE-23864 No need to submit SplitTableRegionProcedure/MergeTableRegionsProcedure...
[hbase.git] / hbase-server / src / main / java / org / apache / hadoop / hbase / master / HMaster.java
blobedc44b8e1ce1dde09b63a1827ff3fc9717c312aa
1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org.apache.hadoop.hbase.master;
20 import static org.apache.hadoop.hbase.HConstants.DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK;
21 import static org.apache.hadoop.hbase.HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS;
22 import static org.apache.hadoop.hbase.HConstants.HBASE_SPLIT_WAL_COORDINATED_BY_ZK;
24 import com.google.protobuf.Descriptors;
25 import com.google.protobuf.Service;
26 import java.io.IOException;
27 import java.io.InterruptedIOException;
28 import java.lang.reflect.Constructor;
29 import java.lang.reflect.InvocationTargetException;
30 import java.net.InetAddress;
31 import java.net.InetSocketAddress;
32 import java.net.UnknownHostException;
33 import java.util.ArrayList;
34 import java.util.Arrays;
35 import java.util.Collection;
36 import java.util.Collections;
37 import java.util.Comparator;
38 import java.util.EnumSet;
39 import java.util.HashMap;
40 import java.util.Iterator;
41 import java.util.List;
42 import java.util.Map;
43 import java.util.Map.Entry;
44 import java.util.Objects;
45 import java.util.Optional;
46 import java.util.Set;
47 import java.util.concurrent.ExecutionException;
48 import java.util.concurrent.Future;
49 import java.util.concurrent.TimeUnit;
50 import java.util.concurrent.TimeoutException;
51 import java.util.concurrent.atomic.AtomicInteger;
52 import java.util.function.Function;
53 import java.util.regex.Pattern;
54 import java.util.stream.Collectors;
55 import javax.servlet.ServletException;
56 import javax.servlet.http.HttpServlet;
57 import javax.servlet.http.HttpServletRequest;
58 import javax.servlet.http.HttpServletResponse;
59 import org.apache.commons.lang3.StringUtils;
60 import org.apache.hadoop.conf.Configuration;
61 import org.apache.hadoop.fs.Path;
62 import org.apache.hadoop.hbase.ChoreService;
63 import org.apache.hadoop.hbase.ClusterId;
64 import org.apache.hadoop.hbase.ClusterMetrics;
65 import org.apache.hadoop.hbase.ClusterMetrics.Option;
66 import org.apache.hadoop.hbase.ClusterMetricsBuilder;
67 import org.apache.hadoop.hbase.DoNotRetryIOException;
68 import org.apache.hadoop.hbase.HBaseIOException;
69 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
70 import org.apache.hadoop.hbase.HConstants;
71 import org.apache.hadoop.hbase.InvalidFamilyOperationException;
72 import org.apache.hadoop.hbase.MasterNotRunningException;
73 import org.apache.hadoop.hbase.MetaTableAccessor;
74 import org.apache.hadoop.hbase.NamespaceDescriptor;
75 import org.apache.hadoop.hbase.PleaseHoldException;
76 import org.apache.hadoop.hbase.ReplicationPeerNotFoundException;
77 import org.apache.hadoop.hbase.ServerName;
78 import org.apache.hadoop.hbase.TableName;
79 import org.apache.hadoop.hbase.TableNotDisabledException;
80 import org.apache.hadoop.hbase.TableNotFoundException;
81 import org.apache.hadoop.hbase.UnknownRegionException;
82 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
83 import org.apache.hadoop.hbase.client.MasterSwitchType;
84 import org.apache.hadoop.hbase.client.RegionInfo;
85 import org.apache.hadoop.hbase.client.RegionInfoBuilder;
86 import org.apache.hadoop.hbase.client.RegionStatesCount;
87 import org.apache.hadoop.hbase.client.TableDescriptor;
88 import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
89 import org.apache.hadoop.hbase.client.TableState;
90 import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
91 import org.apache.hadoop.hbase.exceptions.DeserializationException;
92 import org.apache.hadoop.hbase.executor.ExecutorType;
93 import org.apache.hadoop.hbase.favored.FavoredNodesManager;
94 import org.apache.hadoop.hbase.favored.FavoredNodesPromoter;
95 import org.apache.hadoop.hbase.http.InfoServer;
96 import org.apache.hadoop.hbase.ipc.CoprocessorRpcUtils;
97 import org.apache.hadoop.hbase.ipc.RpcServer;
98 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
99 import org.apache.hadoop.hbase.log.HBaseMarkers;
100 import org.apache.hadoop.hbase.master.MasterRpcServices.BalanceSwitchMode;
101 import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
102 import org.apache.hadoop.hbase.master.assignment.MergeTableRegionsProcedure;
103 import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
104 import org.apache.hadoop.hbase.master.assignment.RegionStates;
105 import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
106 import org.apache.hadoop.hbase.master.balancer.BalancerChore;
107 import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
108 import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore;
109 import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
110 import org.apache.hadoop.hbase.master.cleaner.DirScanPool;
111 import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
112 import org.apache.hadoop.hbase.master.cleaner.LogCleaner;
113 import org.apache.hadoop.hbase.master.cleaner.ReplicationBarrierCleaner;
114 import org.apache.hadoop.hbase.master.cleaner.SnapshotCleanerChore;
115 import org.apache.hadoop.hbase.master.locking.LockManager;
116 import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan;
117 import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan.PlanType;
118 import org.apache.hadoop.hbase.master.normalizer.RegionNormalizer;
119 import org.apache.hadoop.hbase.master.normalizer.RegionNormalizerChore;
120 import org.apache.hadoop.hbase.master.normalizer.RegionNormalizerFactory;
121 import org.apache.hadoop.hbase.master.procedure.CreateTableProcedure;
122 import org.apache.hadoop.hbase.master.procedure.DeleteNamespaceProcedure;
123 import org.apache.hadoop.hbase.master.procedure.DeleteTableProcedure;
124 import org.apache.hadoop.hbase.master.procedure.DisableTableProcedure;
125 import org.apache.hadoop.hbase.master.procedure.EnableTableProcedure;
126 import org.apache.hadoop.hbase.master.procedure.InitMetaProcedure;
127 import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants;
128 import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
129 import org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler;
130 import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil;
131 import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil.NonceProcedureRunnable;
132 import org.apache.hadoop.hbase.master.procedure.ModifyTableProcedure;
133 import org.apache.hadoop.hbase.master.procedure.ProcedurePrepareLatch;
134 import org.apache.hadoop.hbase.master.procedure.ProcedureSyncWait;
135 import org.apache.hadoop.hbase.master.procedure.ReopenTableRegionsProcedure;
136 import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
137 import org.apache.hadoop.hbase.master.procedure.TruncateTableProcedure;
138 import org.apache.hadoop.hbase.master.replication.AbstractPeerProcedure;
139 import org.apache.hadoop.hbase.master.replication.AddPeerProcedure;
140 import org.apache.hadoop.hbase.master.replication.DisablePeerProcedure;
141 import org.apache.hadoop.hbase.master.replication.EnablePeerProcedure;
142 import org.apache.hadoop.hbase.master.replication.RemovePeerProcedure;
143 import org.apache.hadoop.hbase.master.replication.ReplicationPeerManager;
144 import org.apache.hadoop.hbase.master.replication.SyncReplicationReplayWALManager;
145 import org.apache.hadoop.hbase.master.replication.TransitPeerSyncReplicationStateProcedure;
146 import org.apache.hadoop.hbase.master.replication.UpdatePeerConfigProcedure;
147 import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
148 import org.apache.hadoop.hbase.master.zksyncer.MasterAddressSyncer;
149 import org.apache.hadoop.hbase.master.zksyncer.MetaLocationSyncer;
150 import org.apache.hadoop.hbase.mob.MobConstants;
151 import org.apache.hadoop.hbase.monitoring.MemoryBoundedLogMessageBuffer;
152 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
153 import org.apache.hadoop.hbase.monitoring.TaskMonitor;
154 import org.apache.hadoop.hbase.procedure.MasterProcedureManagerHost;
155 import org.apache.hadoop.hbase.procedure.flush.MasterFlushTableProcedureManager;
156 import org.apache.hadoop.hbase.procedure2.LockedResource;
157 import org.apache.hadoop.hbase.procedure2.Procedure;
158 import org.apache.hadoop.hbase.procedure2.ProcedureEvent;
159 import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
160 import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteProcedure;
161 import org.apache.hadoop.hbase.procedure2.RemoteProcedureException;
162 import org.apache.hadoop.hbase.procedure2.store.ProcedureStore;
163 import org.apache.hadoop.hbase.procedure2.store.ProcedureStore.ProcedureStoreListener;
164 import org.apache.hadoop.hbase.procedure2.store.region.RegionProcedureStore;
165 import org.apache.hadoop.hbase.quotas.MasterQuotaManager;
166 import org.apache.hadoop.hbase.quotas.MasterQuotasObserver;
167 import org.apache.hadoop.hbase.quotas.QuotaObserverChore;
168 import org.apache.hadoop.hbase.quotas.QuotaTableUtil;
169 import org.apache.hadoop.hbase.quotas.QuotaUtil;
170 import org.apache.hadoop.hbase.quotas.SnapshotQuotaObserverChore;
171 import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshot;
172 import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshot.SpaceQuotaStatus;
173 import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshotNotifier;
174 import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshotNotifierFactory;
175 import org.apache.hadoop.hbase.quotas.SpaceViolationPolicy;
176 import org.apache.hadoop.hbase.regionserver.HRegionServer;
177 import org.apache.hadoop.hbase.regionserver.RSRpcServices;
178 import org.apache.hadoop.hbase.replication.ReplicationException;
179 import org.apache.hadoop.hbase.replication.ReplicationLoadSource;
180 import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
181 import org.apache.hadoop.hbase.replication.ReplicationPeerDescription;
182 import org.apache.hadoop.hbase.replication.ReplicationUtils;
183 import org.apache.hadoop.hbase.replication.SyncReplicationState;
184 import org.apache.hadoop.hbase.replication.master.ReplicationHFileCleaner;
185 import org.apache.hadoop.hbase.replication.master.ReplicationLogCleaner;
186 import org.apache.hadoop.hbase.replication.master.ReplicationPeerConfigUpgrader;
187 import org.apache.hadoop.hbase.replication.regionserver.ReplicationStatus;
188 import org.apache.hadoop.hbase.security.AccessDeniedException;
189 import org.apache.hadoop.hbase.security.SecurityConstants;
190 import org.apache.hadoop.hbase.security.UserProvider;
191 import org.apache.hadoop.hbase.trace.TraceUtil;
192 import org.apache.hadoop.hbase.util.Addressing;
193 import org.apache.hadoop.hbase.util.Bytes;
194 import org.apache.hadoop.hbase.util.FutureUtils;
195 import org.apache.hadoop.hbase.util.HBaseFsck;
196 import org.apache.hadoop.hbase.util.HFileArchiveUtil;
197 import org.apache.hadoop.hbase.util.HasThread;
198 import org.apache.hadoop.hbase.util.IdLock;
199 import org.apache.hadoop.hbase.util.ModifyRegionUtils;
200 import org.apache.hadoop.hbase.util.Pair;
201 import org.apache.hadoop.hbase.util.RetryCounter;
202 import org.apache.hadoop.hbase.util.RetryCounterFactory;
203 import org.apache.hadoop.hbase.util.TableDescriptorChecker;
204 import org.apache.hadoop.hbase.util.Threads;
205 import org.apache.hadoop.hbase.util.VersionInfo;
206 import org.apache.hadoop.hbase.zookeeper.LoadBalancerTracker;
207 import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
208 import org.apache.hadoop.hbase.zookeeper.RegionNormalizerTracker;
209 import org.apache.hadoop.hbase.zookeeper.SnapshotCleanupTracker;
210 import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
211 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
212 import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
213 import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
214 import org.apache.yetus.audience.InterfaceAudience;
215 import org.apache.zookeeper.KeeperException;
216 import org.eclipse.jetty.server.Server;
217 import org.eclipse.jetty.server.ServerConnector;
218 import org.eclipse.jetty.servlet.ServletHolder;
219 import org.eclipse.jetty.webapp.WebAppContext;
220 import org.slf4j.Logger;
221 import org.slf4j.LoggerFactory;
223 import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
224 import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
225 import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
227 import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
228 import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter;
229 import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
230 import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
233 * HMaster is the "master server" for HBase. An HBase cluster has one active
234 * master. If many masters are started, all compete. Whichever wins goes on to
235 * run the cluster. All others park themselves in their constructor until
236 * master or cluster shutdown or until the active master loses its lease in
237 * zookeeper. Thereafter, all running master jostle to take over master role.
239 * <p>The Master can be asked shutdown the cluster. See {@link #shutdown()}. In
240 * this case it will tell all regionservers to go down and then wait on them
241 * all reporting in that they are down. This master will then shut itself down.
243 * <p>You can also shutdown just this master. Call {@link #stopMaster()}.
245 * @see org.apache.zookeeper.Watcher
247 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
248 @SuppressWarnings("deprecation")
249 public class HMaster extends HRegionServer implements MasterServices {
250 private static Logger LOG = LoggerFactory.getLogger(HMaster.class);
253 * Protection against zombie master. Started once Master accepts active responsibility and
254 * starts taking over responsibilities. Allows a finite time window before giving up ownership.
256 private static class InitializationMonitor extends HasThread {
257 /** The amount of time in milliseconds to sleep before checking initialization status. */
258 public static final String TIMEOUT_KEY = "hbase.master.initializationmonitor.timeout";
259 public static final long TIMEOUT_DEFAULT = TimeUnit.MILLISECONDS.convert(15, TimeUnit.MINUTES);
262 * When timeout expired and initialization has not complete, call {@link System#exit(int)} when
263 * true, do nothing otherwise.
265 public static final String HALT_KEY = "hbase.master.initializationmonitor.haltontimeout";
266 public static final boolean HALT_DEFAULT = false;
268 private final HMaster master;
269 private final long timeout;
270 private final boolean haltOnTimeout;
272 /** Creates a Thread that monitors the {@link #isInitialized()} state. */
273 InitializationMonitor(HMaster master) {
274 super("MasterInitializationMonitor");
275 this.master = master;
276 this.timeout = master.getConfiguration().getLong(TIMEOUT_KEY, TIMEOUT_DEFAULT);
277 this.haltOnTimeout = master.getConfiguration().getBoolean(HALT_KEY, HALT_DEFAULT);
278 this.setDaemon(true);
281 @Override
282 public void run() {
283 try {
284 while (!master.isStopped() && master.isActiveMaster()) {
285 Thread.sleep(timeout);
286 if (master.isInitialized()) {
287 LOG.debug("Initialization completed within allotted tolerance. Monitor exiting.");
288 } else {
289 LOG.error("Master failed to complete initialization after " + timeout + "ms. Please"
290 + " consider submitting a bug report including a thread dump of this process.");
291 if (haltOnTimeout) {
292 LOG.error("Zombie Master exiting. Thread dump to stdout");
293 Threads.printThreadInfo(System.out, "Zombie HMaster");
294 System.exit(-1);
298 } catch (InterruptedException ie) {
299 LOG.trace("InitMonitor thread interrupted. Existing.");
304 // MASTER is name of the webapp and the attribute name used stuffing this
305 //instance into web context.
306 public static final String MASTER = "master";
308 // Manager and zk listener for master election
309 private final ActiveMasterManager activeMasterManager;
310 // Region server tracker
311 private RegionServerTracker regionServerTracker;
312 // Draining region server tracker
313 private DrainingServerTracker drainingServerTracker;
314 // Tracker for load balancer state
315 LoadBalancerTracker loadBalancerTracker;
316 // Tracker for meta location, if any client ZK quorum specified
317 MetaLocationSyncer metaLocationSyncer;
318 // Tracker for active master location, if any client ZK quorum specified
319 MasterAddressSyncer masterAddressSyncer;
320 // Tracker for auto snapshot cleanup state
321 SnapshotCleanupTracker snapshotCleanupTracker;
323 // Tracker for split and merge state
324 private SplitOrMergeTracker splitOrMergeTracker;
326 // Tracker for region normalizer state
327 private RegionNormalizerTracker regionNormalizerTracker;
329 private ClusterSchemaService clusterSchemaService;
331 public static final String HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS =
332 "hbase.master.wait.on.service.seconds";
333 public static final int DEFAULT_HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS = 5 * 60;
335 public static final String HBASE_MASTER_CLEANER_INTERVAL = "hbase.master.cleaner.interval";
337 public static final int DEFAULT_HBASE_MASTER_CLEANER_INTERVAL = 600 * 1000;
339 // Metrics for the HMaster
340 final MetricsMaster metricsMaster;
341 // file system manager for the master FS operations
342 private MasterFileSystem fileSystemManager;
343 private MasterWalManager walManager;
345 // manager to manage procedure-based WAL splitting, can be null if current
346 // is zk-based WAL splitting. SplitWALManager will replace SplitLogManager
347 // and MasterWalManager, which means zk-based WAL splitting code will be
348 // useless after we switch to the procedure-based one. our eventual goal
349 // is to remove all the zk-based WAL splitting code.
350 private SplitWALManager splitWALManager;
352 // server manager to deal with region server info
353 private volatile ServerManager serverManager;
355 // manager of assignment nodes in zookeeper
356 private AssignmentManager assignmentManager;
358 // manager of replication
359 private ReplicationPeerManager replicationPeerManager;
361 private SyncReplicationReplayWALManager syncReplicationReplayWALManager;
363 // buffer for "fatal error" notices from region servers
364 // in the cluster. This is only used for assisting
365 // operations/debugging.
366 MemoryBoundedLogMessageBuffer rsFatals;
368 // flag set after we become the active master (used for testing)
369 private volatile boolean activeMaster = false;
371 // flag set after we complete initialization once active
372 private final ProcedureEvent<?> initialized = new ProcedureEvent<>("master initialized");
374 // flag set after master services are started,
375 // initialization may have not completed yet.
376 volatile boolean serviceStarted = false;
378 // Maximum time we should run balancer for
379 private final int maxBlancingTime;
380 // Maximum percent of regions in transition when balancing
381 private final double maxRitPercent;
383 private final LockManager lockManager = new LockManager(this);
385 private LoadBalancer balancer;
386 private RegionNormalizer normalizer;
387 private BalancerChore balancerChore;
388 private RegionNormalizerChore normalizerChore;
389 private ClusterStatusChore clusterStatusChore;
390 private ClusterStatusPublisher clusterStatusPublisherChore = null;
391 private SnapshotCleanerChore snapshotCleanerChore = null;
393 private HbckChore hbckChore;
394 CatalogJanitor catalogJanitorChore;
395 private DirScanPool cleanerPool;
396 private LogCleaner logCleaner;
397 private HFileCleaner hfileCleaner;
398 private ReplicationBarrierCleaner replicationBarrierCleaner;
399 private ExpiredMobFileCleanerChore expiredMobFileCleanerChore;
400 private MobCompactionChore mobCompactChore;
401 private MasterMobCompactionThread mobCompactThread;
402 // used to synchronize the mobCompactionStates
403 private final IdLock mobCompactionLock = new IdLock();
404 // save the information of mob compactions in tables.
405 // the key is table name, the value is the number of compactions in that table.
406 private Map<TableName, AtomicInteger> mobCompactionStates = Maps.newConcurrentMap();
408 MasterCoprocessorHost cpHost;
410 private final boolean preLoadTableDescriptors;
412 // Time stamps for when a hmaster became active
413 private long masterActiveTime;
415 // Time stamp for when HMaster finishes becoming Active Master
416 private long masterFinishedInitializationTime;
418 Map<String, Service> coprocessorServiceHandlers = Maps.newHashMap();
420 // monitor for snapshot of hbase tables
421 SnapshotManager snapshotManager;
422 // monitor for distributed procedures
423 private MasterProcedureManagerHost mpmHost;
425 private RegionsRecoveryChore regionsRecoveryChore = null;
427 private RegionsRecoveryConfigManager regionsRecoveryConfigManager = null;
428 // it is assigned after 'initialized' guard set to true, so should be volatile
429 private volatile MasterQuotaManager quotaManager;
430 private SpaceQuotaSnapshotNotifier spaceQuotaSnapshotNotifier;
431 private QuotaObserverChore quotaObserverChore;
432 private SnapshotQuotaObserverChore snapshotQuotaChore;
434 private ProcedureExecutor<MasterProcedureEnv> procedureExecutor;
435 private ProcedureStore procedureStore;
437 // handle table states
438 private TableStateManager tableStateManager;
440 private long splitPlanCount;
441 private long mergePlanCount;
443 /* Handle favored nodes information */
444 private FavoredNodesManager favoredNodesManager;
446 /** jetty server for master to redirect requests to regionserver infoServer */
447 private Server masterJettyServer;
449 // Determine if we should do normal startup or minimal "single-user" mode with no region
450 // servers and no user tables. Useful for repair and recovery of hbase:meta
451 private final boolean maintenanceMode;
452 static final String MAINTENANCE_MODE = "hbase.master.maintenance_mode";
454 // Cached clusterId on stand by masters to serve clusterID requests from clients.
455 private final CachedClusterId cachedClusterId;
457 public static class RedirectServlet extends HttpServlet {
458 private static final long serialVersionUID = 2894774810058302473L;
459 private final int regionServerInfoPort;
460 private final String regionServerHostname;
463 * @param infoServer that we're trying to send all requests to
464 * @param hostname may be null. if given, will be used for redirects instead of host from client.
466 public RedirectServlet(InfoServer infoServer, String hostname) {
467 regionServerInfoPort = infoServer.getPort();
468 regionServerHostname = hostname;
471 @Override
472 public void doGet(HttpServletRequest request,
473 HttpServletResponse response) throws ServletException, IOException {
474 String redirectHost = regionServerHostname;
475 if(redirectHost == null) {
476 redirectHost = request.getServerName();
477 if(!Addressing.isLocalAddress(InetAddress.getByName(redirectHost))) {
478 LOG.warn("Couldn't resolve '" + redirectHost + "' as an address local to this node and '" +
479 MASTER_HOSTNAME_KEY + "' is not set; client will get an HTTP 400 response. If " +
480 "your HBase deployment relies on client accessible names that the region server process " +
481 "can't resolve locally, then you should set the previously mentioned configuration variable " +
482 "to an appropriate hostname.");
483 // no sending client provided input back to the client, so the goal host is just in the logs.
484 response.sendError(400, "Request was to a host that I can't resolve for any of the network interfaces on " +
485 "this node. If this is due to an intermediary such as an HTTP load balancer or other proxy, your HBase " +
486 "administrator can set '" + MASTER_HOSTNAME_KEY + "' to point to the correct hostname.");
487 return;
490 // TODO this scheme should come from looking at the scheme registered in the infoserver's http server for the
491 // host and port we're using, but it's buried way too deep to do that ATM.
492 String redirectUrl = request.getScheme() + "://"
493 + redirectHost + ":" + regionServerInfoPort
494 + request.getRequestURI();
495 response.sendRedirect(redirectUrl);
500 * Initializes the HMaster. The steps are as follows:
501 * <p>
502 * <ol>
503 * <li>Initialize the local HRegionServer
504 * <li>Start the ActiveMasterManager.
505 * </ol>
506 * <p>
507 * Remaining steps of initialization occur in
508 * {@link #finishActiveMasterInitialization(MonitoredTask)} after the master becomes the
509 * active one.
511 public HMaster(final Configuration conf)
512 throws IOException, KeeperException {
513 super(conf);
514 TraceUtil.initTracer(conf);
515 try {
516 if (conf.getBoolean(MAINTENANCE_MODE, false)) {
517 LOG.info("Detected {}=true via configuration.", MAINTENANCE_MODE);
518 maintenanceMode = true;
519 } else if (Boolean.getBoolean(MAINTENANCE_MODE)) {
520 LOG.info("Detected {}=true via environment variables.", MAINTENANCE_MODE);
521 maintenanceMode = true;
522 } else {
523 maintenanceMode = false;
526 this.rsFatals = new MemoryBoundedLogMessageBuffer(
527 conf.getLong("hbase.master.buffer.for.rs.fatals", 1 * 1024 * 1024));
528 LOG.info("hbase.rootdir={}, hbase.cluster.distributed={}", getDataRootDir(),
529 this.conf.getBoolean(HConstants.CLUSTER_DISTRIBUTED, false));
531 // Disable usage of meta replicas in the master
532 this.conf.setBoolean(HConstants.USE_META_REPLICAS, false);
534 decorateMasterConfiguration(this.conf);
536 // Hack! Maps DFSClient => Master for logs. HDFS made this
537 // config param for task trackers, but we can piggyback off of it.
538 if (this.conf.get("mapreduce.task.attempt.id") == null) {
539 this.conf.set("mapreduce.task.attempt.id", "hb_m_" + this.serverName.toString());
542 this.metricsMaster = new MetricsMaster(new MetricsMasterWrapperImpl(this));
544 // preload table descriptor at startup
545 this.preLoadTableDescriptors = conf.getBoolean("hbase.master.preload.tabledescriptors", true);
547 this.maxBlancingTime = getMaxBalancingTime();
548 this.maxRitPercent = conf.getDouble(HConstants.HBASE_MASTER_BALANCER_MAX_RIT_PERCENT,
549 HConstants.DEFAULT_HBASE_MASTER_BALANCER_MAX_RIT_PERCENT);
551 // Do we publish the status?
553 boolean shouldPublish = conf.getBoolean(HConstants.STATUS_PUBLISHED,
554 HConstants.STATUS_PUBLISHED_DEFAULT);
555 Class<? extends ClusterStatusPublisher.Publisher> publisherClass =
556 conf.getClass(ClusterStatusPublisher.STATUS_PUBLISHER_CLASS,
557 ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS,
558 ClusterStatusPublisher.Publisher.class);
560 if (shouldPublish) {
561 if (publisherClass == null) {
562 LOG.warn(HConstants.STATUS_PUBLISHED + " is true, but " +
563 ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS +
564 " is not set - not publishing status");
565 } else {
566 clusterStatusPublisherChore = new ClusterStatusPublisher(this, conf, publisherClass);
567 getChoreService().scheduleChore(clusterStatusPublisherChore);
571 // Some unit tests don't need a cluster, so no zookeeper at all
572 if (!conf.getBoolean("hbase.testing.nocluster", false)) {
573 this.activeMasterManager = new ActiveMasterManager(zooKeeper, this.serverName, this);
574 } else {
575 this.activeMasterManager = null;
577 cachedClusterId = new CachedClusterId(conf);
578 } catch (Throwable t) {
579 // Make sure we log the exception. HMaster is often started via reflection and the
580 // cause of failed startup is lost.
581 LOG.error("Failed construction of Master", t);
582 throw t;
586 @Override
587 protected String getUseThisHostnameInstead(Configuration conf) {
588 return conf.get(MASTER_HOSTNAME_KEY);
591 // Main run loop. Calls through to the regionserver run loop AFTER becoming active Master; will
592 // block in here until then.
593 @Override
594 public void run() {
595 try {
596 if (!conf.getBoolean("hbase.testing.nocluster", false)) {
597 Threads.setDaemonThreadRunning(new Thread(() -> {
598 try {
599 int infoPort = putUpJettyServer();
600 startActiveMasterManager(infoPort);
601 } catch (Throwable t) {
602 // Make sure we log the exception.
603 String error = "Failed to become Active Master";
604 LOG.error(error, t);
605 // Abort should have been called already.
606 if (!isAborted()) {
607 abort(error, t);
610 }), getName() + ":becomeActiveMaster");
612 // Fall in here even if we have been aborted. Need to run the shutdown services and
613 // the super run call will do this for us.
614 super.run();
615 } finally {
616 if (this.clusterSchemaService != null) {
617 // If on way out, then we are no longer active master.
618 this.clusterSchemaService.stopAsync();
619 try {
620 this.clusterSchemaService.awaitTerminated(
621 getConfiguration().getInt(HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS,
622 DEFAULT_HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS), TimeUnit.SECONDS);
623 } catch (TimeoutException te) {
624 LOG.warn("Failed shutdown of clusterSchemaService", te);
627 this.activeMaster = false;
631 // return the actual infoPort, -1 means disable info server.
632 private int putUpJettyServer() throws IOException {
633 if (!conf.getBoolean("hbase.master.infoserver.redirect", true)) {
634 return -1;
636 final int infoPort = conf.getInt("hbase.master.info.port.orig",
637 HConstants.DEFAULT_MASTER_INFOPORT);
638 // -1 is for disabling info server, so no redirecting
639 if (infoPort < 0 || infoServer == null) {
640 return -1;
642 if(infoPort == infoServer.getPort()) {
643 return infoPort;
645 final String addr = conf.get("hbase.master.info.bindAddress", "0.0.0.0");
646 if (!Addressing.isLocalAddress(InetAddress.getByName(addr))) {
647 String msg =
648 "Failed to start redirecting jetty server. Address " + addr
649 + " does not belong to this host. Correct configuration parameter: "
650 + "hbase.master.info.bindAddress";
651 LOG.error(msg);
652 throw new IOException(msg);
655 // TODO I'm pretty sure we could just add another binding to the InfoServer run by
656 // the RegionServer and have it run the RedirectServlet instead of standing up
657 // a second entire stack here.
658 masterJettyServer = new Server();
659 final ServerConnector connector = new ServerConnector(masterJettyServer);
660 connector.setHost(addr);
661 connector.setPort(infoPort);
662 masterJettyServer.addConnector(connector);
663 masterJettyServer.setStopAtShutdown(true);
665 final String redirectHostname =
666 StringUtils.isBlank(useThisHostnameInstead) ? null : useThisHostnameInstead;
668 final RedirectServlet redirect = new RedirectServlet(infoServer, redirectHostname);
669 final WebAppContext context = new WebAppContext(null, "/", null, null, null, null, WebAppContext.NO_SESSIONS);
670 context.addServlet(new ServletHolder(redirect), "/*");
671 context.setServer(masterJettyServer);
673 try {
674 masterJettyServer.start();
675 } catch (Exception e) {
676 throw new IOException("Failed to start redirecting jetty server", e);
678 return connector.getLocalPort();
681 @Override
682 protected Function<TableDescriptorBuilder, TableDescriptorBuilder> getMetaTableObserver() {
683 return builder -> builder.setRegionReplication(conf.getInt(HConstants.META_REPLICAS_NUM, HConstants.DEFAULT_META_REPLICA_NUM));
686 * For compatibility, if failed with regionserver credentials, try the master one
688 @Override
689 protected void login(UserProvider user, String host) throws IOException {
690 try {
691 super.login(user, host);
692 } catch (IOException ie) {
693 user.login(SecurityConstants.MASTER_KRB_KEYTAB_FILE,
694 SecurityConstants.MASTER_KRB_PRINCIPAL, host);
699 * If configured to put regions on active master,
700 * wait till a backup master becomes active.
701 * Otherwise, loop till the server is stopped or aborted.
703 @Override
704 protected void waitForMasterActive(){
705 if (maintenanceMode) {
706 return;
708 boolean tablesOnMaster = LoadBalancer.isTablesOnMaster(conf);
709 while (!(tablesOnMaster && activeMaster) && !isStopped() && !isAborted()) {
710 sleeper.sleep();
714 @VisibleForTesting
715 public MasterRpcServices getMasterRpcServices() {
716 return (MasterRpcServices)rpcServices;
719 public boolean balanceSwitch(final boolean b) throws IOException {
720 return getMasterRpcServices().switchBalancer(b, BalanceSwitchMode.ASYNC);
723 @Override
724 protected String getProcessName() {
725 return MASTER;
728 @Override
729 protected boolean canCreateBaseZNode() {
730 return true;
733 @Override
734 protected boolean canUpdateTableDescriptor() {
735 return true;
738 @Override
739 protected RSRpcServices createRpcServices() throws IOException {
740 return new MasterRpcServices(this);
743 @Override
744 protected void configureInfoServer() {
745 infoServer.addUnprivilegedServlet("master-status", "/master-status", MasterStatusServlet.class);
746 infoServer.setAttribute(MASTER, this);
747 if (LoadBalancer.isTablesOnMaster(conf)) {
748 super.configureInfoServer();
752 @Override
753 protected Class<? extends HttpServlet> getDumpServlet() {
754 return MasterDumpServlet.class;
757 @Override
758 public MetricsMaster getMasterMetrics() {
759 return metricsMaster;
763 * <p>
764 * Initialize all ZK based system trackers. But do not include {@link RegionServerTracker}, it
765 * should have already been initialized along with {@link ServerManager}.
766 * </p>
767 * <p>
768 * Will be overridden in tests.
769 * </p>
771 @VisibleForTesting
772 protected void initializeZKBasedSystemTrackers()
773 throws IOException, InterruptedException, KeeperException, ReplicationException {
774 this.balancer = LoadBalancerFactory.getLoadBalancer(conf);
775 this.normalizer = RegionNormalizerFactory.getRegionNormalizer(conf);
776 this.normalizer.setMasterServices(this);
777 this.normalizer.setMasterRpcServices((MasterRpcServices)rpcServices);
778 this.loadBalancerTracker = new LoadBalancerTracker(zooKeeper, this);
779 this.loadBalancerTracker.start();
781 this.regionNormalizerTracker = new RegionNormalizerTracker(zooKeeper, this);
782 this.regionNormalizerTracker.start();
784 this.splitOrMergeTracker = new SplitOrMergeTracker(zooKeeper, conf, this);
785 this.splitOrMergeTracker.start();
787 this.replicationPeerManager = ReplicationPeerManager.create(zooKeeper, conf);
789 this.drainingServerTracker = new DrainingServerTracker(zooKeeper, this, this.serverManager);
790 this.drainingServerTracker.start();
792 this.snapshotCleanupTracker = new SnapshotCleanupTracker(zooKeeper, this);
793 this.snapshotCleanupTracker.start();
795 String clientQuorumServers = conf.get(HConstants.CLIENT_ZOOKEEPER_QUORUM);
796 boolean clientZkObserverMode = conf.getBoolean(HConstants.CLIENT_ZOOKEEPER_OBSERVER_MODE,
797 HConstants.DEFAULT_CLIENT_ZOOKEEPER_OBSERVER_MODE);
798 if (clientQuorumServers != null && !clientZkObserverMode) {
799 // we need to take care of the ZK information synchronization
800 // if given client ZK are not observer nodes
801 ZKWatcher clientZkWatcher = new ZKWatcher(conf,
802 getProcessName() + ":" + rpcServices.getSocketAddress().getPort() + "-clientZK", this,
803 false, true);
804 this.metaLocationSyncer = new MetaLocationSyncer(zooKeeper, clientZkWatcher, this);
805 this.metaLocationSyncer.start();
806 this.masterAddressSyncer = new MasterAddressSyncer(zooKeeper, clientZkWatcher, this);
807 this.masterAddressSyncer.start();
808 // set cluster id is a one-go effort
809 ZKClusterId.setClusterId(clientZkWatcher, fileSystemManager.getClusterId());
812 // Set the cluster as up. If new RSs, they'll be waiting on this before
813 // going ahead with their startup.
814 boolean wasUp = this.clusterStatusTracker.isClusterUp();
815 if (!wasUp) this.clusterStatusTracker.setClusterUp();
817 LOG.info("Active/primary master=" + this.serverName +
818 ", sessionid=0x" +
819 Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId()) +
820 ", setting cluster-up flag (Was=" + wasUp + ")");
822 // create/initialize the snapshot manager and other procedure managers
823 this.snapshotManager = new SnapshotManager();
824 this.mpmHost = new MasterProcedureManagerHost();
825 this.mpmHost.register(this.snapshotManager);
826 this.mpmHost.register(new MasterFlushTableProcedureManager());
827 this.mpmHost.loadProcedures(conf);
828 this.mpmHost.initialize(this, this.metricsMaster);
831 // Will be overriden in test to inject customized AssignmentManager
832 @VisibleForTesting
833 protected AssignmentManager createAssignmentManager(MasterServices master) {
834 return new AssignmentManager(master);
838 * Finish initialization of HMaster after becoming the primary master.
839 * <p/>
840 * The startup order is a bit complicated but very important, do not change it unless you know
841 * what you are doing.
842 * <ol>
843 * <li>Initialize file system based components - file system manager, wal manager, table
844 * descriptors, etc</li>
845 * <li>Publish cluster id</li>
846 * <li>Here comes the most complicated part - initialize server manager, assignment manager and
847 * region server tracker
848 * <ol type='i'>
849 * <li>Create server manager</li>
850 * <li>Create procedure executor, load the procedures, but do not start workers. We will start it
851 * later after we finish scheduling SCPs to avoid scheduling duplicated SCPs for the same
852 * server</li>
853 * <li>Create assignment manager and start it, load the meta region state, but do not load data
854 * from meta region</li>
855 * <li>Start region server tracker, construct the online servers set and find out dead servers and
856 * schedule SCP for them. The online servers will be constructed by scanning zk, and we will also
857 * scan the wal directory to find out possible live region servers, and the differences between
858 * these two sets are the dead servers</li>
859 * </ol>
860 * </li>
861 * <li>If this is a new deploy, schedule a InitMetaProcedure to initialize meta</li>
862 * <li>Start necessary service threads - balancer, catalog janior, executor services, and also the
863 * procedure executor, etc. Notice that the balancer must be created first as assignment manager
864 * may use it when assigning regions.</li>
865 * <li>Wait for meta to be initialized if necesssary, start table state manager.</li>
866 * <li>Wait for enough region servers to check-in</li>
867 * <li>Let assignment manager load data from meta and construct region states</li>
868 * <li>Start all other things such as chore services, etc</li>
869 * </ol>
870 * <p/>
871 * Notice that now we will not schedule a special procedure to make meta online(unless the first
872 * time where meta has not been created yet), we will rely on SCP to bring meta online.
874 private void finishActiveMasterInitialization(MonitoredTask status) throws IOException,
875 InterruptedException, KeeperException, ReplicationException {
877 * We are active master now... go initialize components we need to run.
879 status.setStatus("Initializing Master file system");
881 this.masterActiveTime = System.currentTimeMillis();
882 // TODO: Do this using Dependency Injection, using PicoContainer, Guice or Spring.
884 // always initialize the MemStoreLAB as we use a region to store procedure now.
885 initializeMemStoreChunkCreator();
886 this.fileSystemManager = new MasterFileSystem(conf);
887 this.walManager = new MasterWalManager(this);
889 // enable table descriptors cache
890 this.tableDescriptors.setCacheOn();
892 // warm-up HTDs cache on master initialization
893 if (preLoadTableDescriptors) {
894 status.setStatus("Pre-loading table descriptors");
895 this.tableDescriptors.getAll();
898 // Publish cluster ID; set it in Master too. The superclass RegionServer does this later but
899 // only after it has checked in with the Master. At least a few tests ask Master for clusterId
900 // before it has called its run method and before RegionServer has done the reportForDuty.
901 ClusterId clusterId = fileSystemManager.getClusterId();
902 status.setStatus("Publishing Cluster ID " + clusterId + " in ZooKeeper");
903 ZKClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId());
904 this.clusterId = clusterId.toString();
906 // Precaution. Put in place the old hbck1 lock file to fence out old hbase1s running their
907 // hbck1s against an hbase2 cluster; it could do damage. To skip this behavior, set
908 // hbase.write.hbck1.lock.file to false.
909 if (this.conf.getBoolean("hbase.write.hbck1.lock.file", true)) {
910 HBaseFsck.checkAndMarkRunningHbck(this.conf,
911 HBaseFsck.createLockRetryCounterFactory(this.conf).create());
914 status.setStatus("Initialize ServerManager and schedule SCP for crash servers");
915 // The below two managers must be created before loading procedures, as they will be used during
916 // loading.
917 this.serverManager = createServerManager(this);
918 this.syncReplicationReplayWALManager = new SyncReplicationReplayWALManager(this);
919 if (!conf.getBoolean(HBASE_SPLIT_WAL_COORDINATED_BY_ZK,
920 DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK)) {
921 this.splitWALManager = new SplitWALManager(this);
923 createProcedureExecutor();
924 Map<Class<?>, List<Procedure<MasterProcedureEnv>>> procsByType =
925 procedureExecutor.getActiveProceduresNoCopy().stream()
926 .collect(Collectors.groupingBy(p -> p.getClass()));
928 // Create Assignment Manager
929 this.assignmentManager = createAssignmentManager(this);
930 this.assignmentManager.start();
931 // TODO: TRSP can perform as the sub procedure for other procedures, so even if it is marked as
932 // completed, it could still be in the procedure list. This is a bit strange but is another
933 // story, need to verify the implementation for ProcedureExecutor and ProcedureStore.
934 List<TransitRegionStateProcedure> ritList =
935 procsByType.getOrDefault(TransitRegionStateProcedure.class, Collections.emptyList()).stream()
936 .filter(p -> !p.isFinished()).map(p -> (TransitRegionStateProcedure) p)
937 .collect(Collectors.toList());
938 this.assignmentManager.setupRIT(ritList);
940 // Start RegionServerTracker with listing of servers found with exiting SCPs -- these should
941 // be registered in the deadServers set -- and with the list of servernames out on the
942 // filesystem that COULD BE 'alive' (we'll schedule SCPs for each and let SCP figure it out).
943 // We also pass dirs that are already 'splitting'... so we can do some checks down in tracker.
944 // TODO: Generate the splitting and live Set in one pass instead of two as we currently do.
945 this.regionServerTracker = new RegionServerTracker(zooKeeper, this, this.serverManager);
946 this.regionServerTracker.start(
947 procsByType.getOrDefault(ServerCrashProcedure.class, Collections.emptyList()).stream()
948 .map(p -> (ServerCrashProcedure) p).map(p -> p.getServerName()).collect(Collectors.toSet()),
949 walManager.getLiveServersFromWALDir(), walManager.getSplittingServersFromWALDir());
950 // This manager will be started AFTER hbase:meta is confirmed on line.
951 // hbase.mirror.table.state.to.zookeeper is so hbase1 clients can connect. They read table
952 // state from zookeeper while hbase2 reads it from hbase:meta. Disable if no hbase1 clients.
953 this.tableStateManager =
954 this.conf.getBoolean(MirroringTableStateManager.MIRROR_TABLE_STATE_TO_ZK_KEY, true)
956 new MirroringTableStateManager(this):
957 new TableStateManager(this);
959 status.setStatus("Initializing ZK system trackers");
960 initializeZKBasedSystemTrackers();
961 status.setStatus("Loading last flushed sequence id of regions");
962 try {
963 this.serverManager.loadLastFlushedSequenceIds();
964 } catch (IOException e) {
965 LOG.info("Failed to load last flushed sequence id of regions"
966 + " from file system", e);
968 // Set ourselves as active Master now our claim has succeeded up in zk.
969 this.activeMaster = true;
971 // Start the Zombie master detector after setting master as active, see HBASE-21535
972 Thread zombieDetector = new Thread(new InitializationMonitor(this),
973 "ActiveMasterInitializationMonitor-" + System.currentTimeMillis());
974 zombieDetector.setDaemon(true);
975 zombieDetector.start();
977 // This is for backwards compatibility
978 // See HBASE-11393
979 status.setStatus("Update TableCFs node in ZNode");
980 ReplicationPeerConfigUpgrader tableCFsUpdater =
981 new ReplicationPeerConfigUpgrader(zooKeeper, conf);
982 tableCFsUpdater.copyTableCFs();
984 if (!maintenanceMode) {
985 // Add the Observer to delete quotas on table deletion before starting all CPs by
986 // default with quota support, avoiding if user specifically asks to not load this Observer.
987 if (QuotaUtil.isQuotaEnabled(conf)) {
988 updateConfigurationForQuotasObserver(conf);
990 // initialize master side coprocessors before we start handling requests
991 status.setStatus("Initializing master coprocessors");
992 this.cpHost = new MasterCoprocessorHost(this, this.conf);
995 // Checking if meta needs initializing.
996 status.setStatus("Initializing meta table if this is a new deploy");
997 InitMetaProcedure initMetaProc = null;
998 // Print out state of hbase:meta on startup; helps debugging.
999 RegionState rs = this.assignmentManager.getRegionStates().
1000 getRegionState(RegionInfoBuilder.FIRST_META_REGIONINFO);
1001 LOG.info("hbase:meta {}", rs);
1002 if (rs != null && rs.isOffline()) {
1003 Optional<InitMetaProcedure> optProc = procedureExecutor.getProcedures().stream()
1004 .filter(p -> p instanceof InitMetaProcedure).map(o -> (InitMetaProcedure) o).findAny();
1005 initMetaProc = optProc.orElseGet(() -> {
1006 // schedule an init meta procedure if meta has not been deployed yet
1007 InitMetaProcedure temp = new InitMetaProcedure();
1008 procedureExecutor.submitProcedure(temp);
1009 return temp;
1012 if (this.balancer instanceof FavoredNodesPromoter) {
1013 favoredNodesManager = new FavoredNodesManager(this);
1016 // initialize load balancer
1017 this.balancer.setMasterServices(this);
1018 this.balancer.setClusterMetrics(getClusterMetricsWithoutCoprocessor());
1019 this.balancer.initialize();
1021 // start up all service threads.
1022 status.setStatus("Initializing master service threads");
1023 startServiceThreads();
1024 // wait meta to be initialized after we start procedure executor
1025 if (initMetaProc != null) {
1026 initMetaProc.await();
1028 // Wake up this server to check in
1029 sleeper.skipSleepCycle();
1031 // Wait for region servers to report in.
1032 // With this as part of master initialization, it precludes our being able to start a single
1033 // server that is both Master and RegionServer. Needs more thought. TODO.
1034 String statusStr = "Wait for region servers to report in";
1035 status.setStatus(statusStr);
1036 LOG.info(Objects.toString(status));
1037 waitForRegionServers(status);
1039 // Check if master is shutting down because issue initializing regionservers or balancer.
1040 if (isStopped()) {
1041 return;
1044 status.setStatus("Starting assignment manager");
1045 // FIRST HBASE:META READ!!!!
1046 // The below cannot make progress w/o hbase:meta being online.
1047 // This is the FIRST attempt at going to hbase:meta. Meta on-lining is going on in background
1048 // as procedures run -- in particular SCPs for crashed servers... One should put up hbase:meta
1049 // if it is down. It may take a while to come online. So, wait here until meta if for sure
1050 // available. That's what waitForMetaOnline does.
1051 if (!waitForMetaOnline()) {
1052 return;
1054 this.assignmentManager.joinCluster();
1055 // The below depends on hbase:meta being online.
1056 this.tableStateManager.start();
1057 // Below has to happen after tablestatemanager has started in the case where this hbase-2.x
1058 // is being started over an hbase-1.x dataset. tablestatemanager runs a migration as part
1059 // of its 'start' moving table state from zookeeper to hbase:meta. This migration needs to
1060 // complete before we do this next step processing offline regions else it fails reading
1061 // table states messing up master launch (namespace table, etc., are not assigned).
1062 this.assignmentManager.processOfflineRegions();
1063 // Initialize after meta is up as below scans meta
1064 if (favoredNodesManager != null && !maintenanceMode) {
1065 SnapshotOfRegionAssignmentFromMeta snapshotOfRegionAssignment =
1066 new SnapshotOfRegionAssignmentFromMeta(getConnection());
1067 snapshotOfRegionAssignment.initialize();
1068 favoredNodesManager.initialize(snapshotOfRegionAssignment);
1071 // set cluster status again after user regions are assigned
1072 this.balancer.setClusterMetrics(getClusterMetricsWithoutCoprocessor());
1074 // Start balancer and meta catalog janitor after meta and regions have been assigned.
1075 status.setStatus("Starting balancer and catalog janitor");
1076 this.clusterStatusChore = new ClusterStatusChore(this, balancer);
1077 getChoreService().scheduleChore(clusterStatusChore);
1078 this.balancerChore = new BalancerChore(this);
1079 getChoreService().scheduleChore(balancerChore);
1080 this.normalizerChore = new RegionNormalizerChore(this);
1081 getChoreService().scheduleChore(normalizerChore);
1082 this.catalogJanitorChore = new CatalogJanitor(this);
1083 getChoreService().scheduleChore(catalogJanitorChore);
1084 this.hbckChore = new HbckChore(this);
1085 getChoreService().scheduleChore(hbckChore);
1086 this.serverManager.startChore();
1088 // Only for rolling upgrade, where we need to migrate the data in namespace table to meta table.
1089 if (!waitForNamespaceOnline()) {
1090 return;
1092 status.setStatus("Starting cluster schema service");
1093 initClusterSchemaService();
1095 if (this.cpHost != null) {
1096 try {
1097 this.cpHost.preMasterInitialization();
1098 } catch (IOException e) {
1099 LOG.error("Coprocessor preMasterInitialization() hook failed", e);
1103 status.markComplete("Initialization successful");
1104 LOG.info(String.format("Master has completed initialization %.3fsec",
1105 (System.currentTimeMillis() - masterActiveTime) / 1000.0f));
1106 this.masterFinishedInitializationTime = System.currentTimeMillis();
1107 configurationManager.registerObserver(this.balancer);
1108 configurationManager.registerObserver(this.cleanerPool);
1109 configurationManager.registerObserver(this.hfileCleaner);
1110 configurationManager.registerObserver(this.logCleaner);
1111 configurationManager.registerObserver(this.regionsRecoveryConfigManager);
1112 // Set master as 'initialized'.
1113 setInitialized(true);
1115 if (maintenanceMode) {
1116 LOG.info("Detected repair mode, skipping final initialization steps.");
1117 return;
1120 assignmentManager.checkIfShouldMoveSystemRegionAsync();
1121 status.setStatus("Assign meta replicas");
1122 MasterMetaBootstrap metaBootstrap = createMetaBootstrap();
1123 metaBootstrap.assignMetaReplicas();
1124 status.setStatus("Starting quota manager");
1125 initQuotaManager();
1126 if (QuotaUtil.isQuotaEnabled(conf)) {
1127 // Create the quota snapshot notifier
1128 spaceQuotaSnapshotNotifier = createQuotaSnapshotNotifier();
1129 spaceQuotaSnapshotNotifier.initialize(getConnection());
1130 this.quotaObserverChore = new QuotaObserverChore(this, getMasterMetrics());
1131 // Start the chore to read the region FS space reports and act on them
1132 getChoreService().scheduleChore(quotaObserverChore);
1134 this.snapshotQuotaChore = new SnapshotQuotaObserverChore(this, getMasterMetrics());
1135 // Start the chore to read snapshots and add their usage to table/NS quotas
1136 getChoreService().scheduleChore(snapshotQuotaChore);
1139 // clear the dead servers with same host name and port of online server because we are not
1140 // removing dead server with same hostname and port of rs which is trying to check in before
1141 // master initialization. See HBASE-5916.
1142 this.serverManager.clearDeadServersWithSameHostNameAndPortOfOnlineServer();
1144 // Check and set the znode ACLs if needed in case we are overtaking a non-secure configuration
1145 status.setStatus("Checking ZNode ACLs");
1146 zooKeeper.checkAndSetZNodeAcls();
1148 status.setStatus("Initializing MOB Cleaner");
1149 initMobCleaner();
1151 status.setStatus("Calling postStartMaster coprocessors");
1152 if (this.cpHost != null) {
1153 // don't let cp initialization errors kill the master
1154 try {
1155 this.cpHost.postStartMaster();
1156 } catch (IOException ioe) {
1157 LOG.error("Coprocessor postStartMaster() hook failed", ioe);
1161 zombieDetector.interrupt();
1164 * After master has started up, lets do balancer post startup initialization. Since this runs
1165 * in activeMasterManager thread, it should be fine.
1167 long start = System.currentTimeMillis();
1168 this.balancer.postMasterStartupInitialize();
1169 if (LOG.isDebugEnabled()) {
1170 LOG.debug("Balancer post startup initialization complete, took " + (
1171 (System.currentTimeMillis() - start) / 1000) + " seconds");
1176 * Check hbase:meta is up and ready for reading. For use during Master startup only.
1177 * @return True if meta is UP and online and startup can progress. Otherwise, meta is not online
1178 * and we will hold here until operator intervention.
1180 @VisibleForTesting
1181 public boolean waitForMetaOnline() throws InterruptedException {
1182 return isRegionOnline(RegionInfoBuilder.FIRST_META_REGIONINFO);
1186 * @return True if region is online and scannable else false if an error or shutdown (Otherwise
1187 * we just block in here holding up all forward-progess).
1189 private boolean isRegionOnline(RegionInfo ri) throws InterruptedException {
1190 RetryCounter rc = null;
1191 while (!isStopped()) {
1192 RegionState rs = this.assignmentManager.getRegionStates().getRegionState(ri);
1193 if (rs.isOpened()) {
1194 if (this.getServerManager().isServerOnline(rs.getServerName())) {
1195 return true;
1198 // Region is not OPEN.
1199 Optional<Procedure<MasterProcedureEnv>> optProc = this.procedureExecutor.getProcedures().
1200 stream().filter(p -> p instanceof ServerCrashProcedure).findAny();
1201 // TODO: Add a page to refguide on how to do repair. Have this log message point to it.
1202 // Page will talk about loss of edits, how to schedule at least the meta WAL recovery, and
1203 // then how to assign including how to break region lock if one held.
1204 LOG.warn("{} is NOT online; state={}; ServerCrashProcedures={}. Master startup cannot " +
1205 "progress, in holding-pattern until region onlined.",
1206 ri.getRegionNameAsString(), rs, optProc.isPresent());
1207 // Check once-a-minute.
1208 if (rc == null) {
1209 rc = new RetryCounterFactory(1000).create();
1211 Threads.sleep(rc.getBackoffTimeAndIncrementAttempts());
1213 return false;
1217 * Check hbase:namespace table is assigned. If not, startup will hang looking for the ns table
1218 * <p/>
1219 * This is for rolling upgrading, later we will migrate the data in ns table to the ns family of
1220 * meta table. And if this is a new clsuter, this method will return immediately as there will be
1221 * no namespace table/region.
1222 * @return True if namespace table is up/online.
1224 private boolean waitForNamespaceOnline() throws InterruptedException, IOException {
1225 TableState nsTableState =
1226 MetaTableAccessor.getTableState(getConnection(), TableName.NAMESPACE_TABLE_NAME);
1227 if (nsTableState == null || nsTableState.isDisabled()) {
1228 // this means we have already migrated the data and disabled or deleted the namespace table,
1229 // or this is a new depliy which does not have a namespace table from the beginning.
1230 return true;
1232 List<RegionInfo> ris =
1233 this.assignmentManager.getRegionStates().getRegionsOfTable(TableName.NAMESPACE_TABLE_NAME);
1234 if (ris.isEmpty()) {
1235 // maybe this will not happen any more, but anyway, no harm to add a check here...
1236 return true;
1238 // Else there are namespace regions up in meta. Ensure they are assigned before we go on.
1239 for (RegionInfo ri : ris) {
1240 isRegionOnline(ri);
1242 return true;
1246 * Adds the {@code MasterQuotasObserver} to the list of configured Master observers to
1247 * automatically remove quotas for a table when that table is deleted.
1249 @VisibleForTesting
1250 public void updateConfigurationForQuotasObserver(Configuration conf) {
1251 // We're configured to not delete quotas on table deletion, so we don't need to add the obs.
1252 if (!conf.getBoolean(
1253 MasterQuotasObserver.REMOVE_QUOTA_ON_TABLE_DELETE,
1254 MasterQuotasObserver.REMOVE_QUOTA_ON_TABLE_DELETE_DEFAULT)) {
1255 return;
1257 String[] masterCoprocs = conf.getStrings(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY);
1258 final int length = null == masterCoprocs ? 0 : masterCoprocs.length;
1259 String[] updatedCoprocs = new String[length + 1];
1260 if (length > 0) {
1261 System.arraycopy(masterCoprocs, 0, updatedCoprocs, 0, masterCoprocs.length);
1263 updatedCoprocs[length] = MasterQuotasObserver.class.getName();
1264 conf.setStrings(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, updatedCoprocs);
1267 private void initMobCleaner() {
1268 this.expiredMobFileCleanerChore = new ExpiredMobFileCleanerChore(this);
1269 getChoreService().scheduleChore(expiredMobFileCleanerChore);
1271 int mobCompactionPeriod = conf.getInt(MobConstants.MOB_COMPACTION_CHORE_PERIOD,
1272 MobConstants.DEFAULT_MOB_COMPACTION_CHORE_PERIOD);
1273 this.mobCompactChore = new MobCompactionChore(this, mobCompactionPeriod);
1274 getChoreService().scheduleChore(mobCompactChore);
1275 this.mobCompactThread = new MasterMobCompactionThread(this);
1279 * <p>
1280 * Create a {@link MasterMetaBootstrap} instance.
1281 * </p>
1282 * <p>
1283 * Will be overridden in tests.
1284 * </p>
1286 @VisibleForTesting
1287 protected MasterMetaBootstrap createMetaBootstrap() {
1288 // We put this out here in a method so can do a Mockito.spy and stub it out
1289 // w/ a mocked up MasterMetaBootstrap.
1290 return new MasterMetaBootstrap(this);
1294 * <p>
1295 * Create a {@link ServerManager} instance.
1296 * </p>
1297 * <p>
1298 * Will be overridden in tests.
1299 * </p>
1301 @VisibleForTesting
1302 protected ServerManager createServerManager(final MasterServices master) throws IOException {
1303 // We put this out here in a method so can do a Mockito.spy and stub it out
1304 // w/ a mocked up ServerManager.
1305 setupClusterConnection();
1306 return new ServerManager(master);
1309 private void waitForRegionServers(final MonitoredTask status)
1310 throws IOException, InterruptedException {
1311 this.serverManager.waitForRegionServers(status);
1314 // Will be overridden in tests
1315 @VisibleForTesting
1316 protected void initClusterSchemaService() throws IOException, InterruptedException {
1317 this.clusterSchemaService = new ClusterSchemaServiceImpl(this);
1318 this.clusterSchemaService.startAsync();
1319 try {
1320 this.clusterSchemaService.awaitRunning(getConfiguration().getInt(
1321 HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS,
1322 DEFAULT_HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS), TimeUnit.SECONDS);
1323 } catch (TimeoutException toe) {
1324 throw new IOException("Timedout starting ClusterSchemaService", toe);
1328 private void initQuotaManager() throws IOException {
1329 MasterQuotaManager quotaManager = new MasterQuotaManager(this);
1330 quotaManager.start();
1331 this.quotaManager = quotaManager;
1334 private SpaceQuotaSnapshotNotifier createQuotaSnapshotNotifier() {
1335 SpaceQuotaSnapshotNotifier notifier =
1336 SpaceQuotaSnapshotNotifierFactory.getInstance().create(getConfiguration());
1337 return notifier;
1340 boolean isCatalogJanitorEnabled() {
1341 return catalogJanitorChore != null ?
1342 catalogJanitorChore.getEnabled() : false;
1345 boolean isCleanerChoreEnabled() {
1346 boolean hfileCleanerFlag = true, logCleanerFlag = true;
1348 if (hfileCleaner != null) {
1349 hfileCleanerFlag = hfileCleaner.getEnabled();
1352 if (logCleaner != null) {
1353 logCleanerFlag = logCleaner.getEnabled();
1356 return (hfileCleanerFlag && logCleanerFlag);
1359 @Override
1360 public ServerManager getServerManager() {
1361 return this.serverManager;
1364 @Override
1365 public MasterFileSystem getMasterFileSystem() {
1366 return this.fileSystemManager;
1369 @Override
1370 public MasterWalManager getMasterWalManager() {
1371 return this.walManager;
1374 @Override
1375 public SplitWALManager getSplitWALManager() {
1376 return splitWALManager;
1379 @Override
1380 public TableStateManager getTableStateManager() {
1381 return tableStateManager;
1385 * Start up all services. If any of these threads gets an unhandled exception
1386 * then they just die with a logged message. This should be fine because
1387 * in general, we do not expect the master to get such unhandled exceptions
1388 * as OOMEs; it should be lightly loaded. See what HRegionServer does if
1389 * need to install an unexpected exception handler.
1391 private void startServiceThreads() throws IOException {
1392 // Start the executor service pools
1393 this.executorService.startExecutorService(ExecutorType.MASTER_OPEN_REGION, conf.getInt(
1394 HConstants.MASTER_OPEN_REGION_THREADS, HConstants.MASTER_OPEN_REGION_THREADS_DEFAULT));
1395 this.executorService.startExecutorService(ExecutorType.MASTER_CLOSE_REGION, conf.getInt(
1396 HConstants.MASTER_CLOSE_REGION_THREADS, HConstants.MASTER_CLOSE_REGION_THREADS_DEFAULT));
1397 this.executorService.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS,
1398 conf.getInt(HConstants.MASTER_SERVER_OPERATIONS_THREADS,
1399 HConstants.MASTER_SERVER_OPERATIONS_THREADS_DEFAULT));
1400 this.executorService.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS,
1401 conf.getInt(HConstants.MASTER_META_SERVER_OPERATIONS_THREADS,
1402 HConstants.MASTER_META_SERVER_OPERATIONS_THREADS_DEFAULT));
1403 this.executorService.startExecutorService(ExecutorType.M_LOG_REPLAY_OPS, conf.getInt(
1404 HConstants.MASTER_LOG_REPLAY_OPS_THREADS, HConstants.MASTER_LOG_REPLAY_OPS_THREADS_DEFAULT));
1405 this.executorService.startExecutorService(ExecutorType.MASTER_SNAPSHOT_OPERATIONS, conf.getInt(
1406 SnapshotManager.SNAPSHOT_POOL_THREADS_KEY, SnapshotManager.SNAPSHOT_POOL_THREADS_DEFAULT));
1408 // We depend on there being only one instance of this executor running
1409 // at a time. To do concurrency, would need fencing of enable/disable of
1410 // tables.
1411 // Any time changing this maxThreads to > 1, pls see the comment at
1412 // AccessController#postCompletedCreateTableAction
1413 this.executorService.startExecutorService(ExecutorType.MASTER_TABLE_OPERATIONS, 1);
1414 startProcedureExecutor();
1416 // Start log cleaner thread
1417 int cleanerInterval =
1418 conf.getInt(HBASE_MASTER_CLEANER_INTERVAL, DEFAULT_HBASE_MASTER_CLEANER_INTERVAL);
1419 this.logCleaner = new LogCleaner(cleanerInterval, this, conf,
1420 getMasterWalManager().getFileSystem(), getMasterWalManager().getOldLogDir(), cleanerPool);
1421 getChoreService().scheduleChore(logCleaner);
1423 // start the hfile archive cleaner thread
1424 Path archiveDir = HFileArchiveUtil.getArchivePath(conf);
1425 Map<String, Object> params = new HashMap<>();
1426 params.put(MASTER, this);
1427 this.hfileCleaner = new HFileCleaner(cleanerInterval, this, conf,
1428 getMasterFileSystem().getFileSystem(), archiveDir, cleanerPool, params);
1429 getChoreService().scheduleChore(hfileCleaner);
1431 // Regions Reopen based on very high storeFileRefCount is considered enabled
1432 // only if hbase.regions.recovery.store.file.ref.count has value > 0
1433 final int maxStoreFileRefCount = conf.getInt(
1434 HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
1435 HConstants.DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD);
1436 if (maxStoreFileRefCount > 0) {
1437 this.regionsRecoveryChore = new RegionsRecoveryChore(this, conf, this);
1438 getChoreService().scheduleChore(this.regionsRecoveryChore);
1439 } else {
1440 LOG.info("Reopening regions with very high storeFileRefCount is disabled. " +
1441 "Provide threshold value > 0 for {} to enable it.",
1442 HConstants.STORE_FILE_REF_COUNT_THRESHOLD);
1445 this.regionsRecoveryConfigManager = new RegionsRecoveryConfigManager(this);
1447 replicationBarrierCleaner = new ReplicationBarrierCleaner(conf, this, getConnection(),
1448 replicationPeerManager);
1449 getChoreService().scheduleChore(replicationBarrierCleaner);
1451 final boolean isSnapshotChoreEnabled = this.snapshotCleanupTracker
1452 .isSnapshotCleanupEnabled();
1453 this.snapshotCleanerChore = new SnapshotCleanerChore(this, conf, getSnapshotManager());
1454 if (isSnapshotChoreEnabled) {
1455 getChoreService().scheduleChore(this.snapshotCleanerChore);
1456 } else {
1457 if (LOG.isTraceEnabled()) {
1458 LOG.trace("Snapshot Cleaner Chore is disabled. Not starting up the chore..");
1461 serviceStarted = true;
1462 if (LOG.isTraceEnabled()) {
1463 LOG.trace("Started service threads");
1467 @Override
1468 protected void stopServiceThreads() {
1469 if (masterJettyServer != null) {
1470 LOG.info("Stopping master jetty server");
1471 try {
1472 masterJettyServer.stop();
1473 } catch (Exception e) {
1474 LOG.error("Failed to stop master jetty server", e);
1477 stopChores();
1478 if (this.mobCompactThread != null) {
1479 this.mobCompactThread.close();
1481 super.stopServiceThreads();
1482 if (cleanerPool != null) {
1483 cleanerPool.shutdownNow();
1484 cleanerPool = null;
1487 LOG.debug("Stopping service threads");
1489 if (this.quotaManager != null) {
1490 this.quotaManager.stop();
1493 if (this.activeMasterManager != null) {
1494 this.activeMasterManager.stop();
1496 if (this.serverManager != null) {
1497 this.serverManager.stop();
1499 if (this.assignmentManager != null) {
1500 this.assignmentManager.stop();
1503 stopProcedureExecutor();
1505 if (this.walManager != null) {
1506 this.walManager.stop();
1508 if (this.fileSystemManager != null) {
1509 this.fileSystemManager.stop();
1511 if (this.mpmHost != null) {
1512 this.mpmHost.stop("server shutting down.");
1514 if (this.regionServerTracker != null) {
1515 this.regionServerTracker.stop();
1519 private void createProcedureExecutor() throws IOException {
1520 MasterProcedureEnv procEnv = new MasterProcedureEnv(this);
1521 // Create cleaner thread pool
1522 cleanerPool = new DirScanPool(conf);
1523 procedureStore = new RegionProcedureStore(this, cleanerPool,
1524 new MasterProcedureEnv.FsUtilsLeaseRecovery(this));
1525 procedureStore.registerListener(new ProcedureStoreListener() {
1527 @Override
1528 public void abortProcess() {
1529 abort("The Procedure Store lost the lease", null);
1532 MasterProcedureScheduler procedureScheduler = procEnv.getProcedureScheduler();
1533 procedureExecutor = new ProcedureExecutor<>(conf, procEnv, procedureStore, procedureScheduler);
1534 configurationManager.registerObserver(procEnv);
1536 int cpus = Runtime.getRuntime().availableProcessors();
1537 final int numThreads = conf.getInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, Math.max(
1538 (cpus > 0 ? cpus / 4 : 0), MasterProcedureConstants.DEFAULT_MIN_MASTER_PROCEDURE_THREADS));
1539 final boolean abortOnCorruption =
1540 conf.getBoolean(MasterProcedureConstants.EXECUTOR_ABORT_ON_CORRUPTION,
1541 MasterProcedureConstants.DEFAULT_EXECUTOR_ABORT_ON_CORRUPTION);
1542 procedureStore.start(numThreads);
1543 // Just initialize it but do not start the workers, we will start the workers later by calling
1544 // startProcedureExecutor. See the javadoc for finishActiveMasterInitialization for more
1545 // details.
1546 procedureExecutor.init(numThreads, abortOnCorruption);
1547 procEnv.getRemoteDispatcher().start();
1550 private void startProcedureExecutor() throws IOException {
1551 procedureExecutor.startWorkers();
1555 * Turn on/off Snapshot Cleanup Chore
1557 * @param on indicates whether Snapshot Cleanup Chore is to be run
1559 void switchSnapshotCleanup(final boolean on, final boolean synchronous) {
1560 if (synchronous) {
1561 synchronized (this.snapshotCleanerChore) {
1562 switchSnapshotCleanup(on);
1564 } else {
1565 switchSnapshotCleanup(on);
1569 private void switchSnapshotCleanup(final boolean on) {
1570 try {
1571 snapshotCleanupTracker.setSnapshotCleanupEnabled(on);
1572 if (on) {
1573 if (!getChoreService().isChoreScheduled(this.snapshotCleanerChore)) {
1574 getChoreService().scheduleChore(this.snapshotCleanerChore);
1576 } else {
1577 getChoreService().cancelChore(this.snapshotCleanerChore);
1579 } catch (KeeperException e) {
1580 LOG.error("Error updating snapshot cleanup mode to {}", on, e);
1585 private void stopProcedureExecutor() {
1586 if (procedureExecutor != null) {
1587 configurationManager.deregisterObserver(procedureExecutor.getEnvironment());
1588 procedureExecutor.getEnvironment().getRemoteDispatcher().stop();
1589 procedureExecutor.stop();
1590 procedureExecutor.join();
1591 procedureExecutor = null;
1594 if (procedureStore != null) {
1595 procedureStore.stop(isAborted());
1596 procedureStore = null;
1600 private void stopChores() {
1601 ChoreService choreService = getChoreService();
1602 if (choreService != null) {
1603 choreService.cancelChore(this.expiredMobFileCleanerChore);
1604 choreService.cancelChore(this.mobCompactChore);
1605 choreService.cancelChore(this.balancerChore);
1606 choreService.cancelChore(this.normalizerChore);
1607 choreService.cancelChore(this.clusterStatusChore);
1608 choreService.cancelChore(this.catalogJanitorChore);
1609 choreService.cancelChore(this.clusterStatusPublisherChore);
1610 choreService.cancelChore(this.snapshotQuotaChore);
1611 choreService.cancelChore(this.logCleaner);
1612 choreService.cancelChore(this.hfileCleaner);
1613 choreService.cancelChore(this.replicationBarrierCleaner);
1614 choreService.cancelChore(this.snapshotCleanerChore);
1615 choreService.cancelChore(this.hbckChore);
1616 choreService.cancelChore(this.regionsRecoveryChore);
1621 * @return Get remote side's InetAddress
1623 InetAddress getRemoteInetAddress(final int port,
1624 final long serverStartCode) throws UnknownHostException {
1625 // Do it out here in its own little method so can fake an address when
1626 // mocking up in tests.
1627 InetAddress ia = RpcServer.getRemoteIp();
1629 // The call could be from the local regionserver,
1630 // in which case, there is no remote address.
1631 if (ia == null && serverStartCode == startcode) {
1632 InetSocketAddress isa = rpcServices.getSocketAddress();
1633 if (isa != null && isa.getPort() == port) {
1634 ia = isa.getAddress();
1637 return ia;
1641 * @return Maximum time we should run balancer for
1643 private int getMaxBalancingTime() {
1644 // if max balancing time isn't set, defaulting it to period time
1645 int maxBalancingTime = getConfiguration().getInt(HConstants.HBASE_BALANCER_MAX_BALANCING,
1646 getConfiguration()
1647 .getInt(HConstants.HBASE_BALANCER_PERIOD, HConstants.DEFAULT_HBASE_BALANCER_PERIOD));
1648 return maxBalancingTime;
1652 * @return Maximum number of regions in transition
1654 private int getMaxRegionsInTransition() {
1655 int numRegions = this.assignmentManager.getRegionStates().getRegionAssignments().size();
1656 return Math.max((int) Math.floor(numRegions * this.maxRitPercent), 1);
1660 * It first sleep to the next balance plan start time. Meanwhile, throttling by the max
1661 * number regions in transition to protect availability.
1662 * @param nextBalanceStartTime The next balance plan start time
1663 * @param maxRegionsInTransition max number of regions in transition
1664 * @param cutoffTime when to exit balancer
1666 private void balanceThrottling(long nextBalanceStartTime, int maxRegionsInTransition,
1667 long cutoffTime) {
1668 boolean interrupted = false;
1670 // Sleep to next balance plan start time
1671 // But if there are zero regions in transition, it can skip sleep to speed up.
1672 while (!interrupted && System.currentTimeMillis() < nextBalanceStartTime
1673 && this.assignmentManager.getRegionStates().hasRegionsInTransition()) {
1674 try {
1675 Thread.sleep(100);
1676 } catch (InterruptedException ie) {
1677 interrupted = true;
1681 // Throttling by max number regions in transition
1682 while (!interrupted
1683 && maxRegionsInTransition > 0
1684 && this.assignmentManager.getRegionStates().getRegionsInTransitionCount()
1685 >= maxRegionsInTransition && System.currentTimeMillis() <= cutoffTime) {
1686 try {
1687 // sleep if the number of regions in transition exceeds the limit
1688 Thread.sleep(100);
1689 } catch (InterruptedException ie) {
1690 interrupted = true;
1694 if (interrupted) Thread.currentThread().interrupt();
1697 public boolean balance() throws IOException {
1698 return balance(false);
1701 public boolean balance(boolean force) throws IOException {
1702 // if master not initialized, don't run balancer.
1703 if (!isInitialized()) {
1704 LOG.debug("Master has not been initialized, don't run balancer.");
1705 return false;
1708 if (isInMaintenanceMode()) {
1709 LOG.info("Master is in maintenanceMode mode, don't run balancer.");
1710 return false;
1713 synchronized (this.balancer) {
1714 // If balance not true, don't run balancer.
1715 if (!this.loadBalancerTracker.isBalancerOn()) return false;
1716 // Only allow one balance run at at time.
1717 if (this.assignmentManager.hasRegionsInTransition()) {
1718 List<RegionStateNode> regionsInTransition = assignmentManager.getRegionsInTransition();
1719 // if hbase:meta region is in transition, result of assignment cannot be recorded
1720 // ignore the force flag in that case
1721 boolean metaInTransition = assignmentManager.isMetaRegionInTransition();
1722 String prefix = force && !metaInTransition ? "R" : "Not r";
1723 List<RegionStateNode> toPrint = regionsInTransition;
1724 int max = 5;
1725 boolean truncated = false;
1726 if (regionsInTransition.size() > max) {
1727 toPrint = regionsInTransition.subList(0, max);
1728 truncated = true;
1730 LOG.info(prefix + "unning balancer because " + regionsInTransition.size() +
1731 " region(s) in transition: " + toPrint + (truncated? "(truncated list)": ""));
1732 if (!force || metaInTransition) return false;
1734 if (this.serverManager.areDeadServersInProgress()) {
1735 LOG.info("Not running balancer because processing dead regionserver(s): " +
1736 this.serverManager.getDeadServers());
1737 return false;
1740 if (this.cpHost != null) {
1741 try {
1742 if (this.cpHost.preBalance()) {
1743 LOG.debug("Coprocessor bypassing balancer request");
1744 return false;
1746 } catch (IOException ioe) {
1747 LOG.error("Error invoking master coprocessor preBalance()", ioe);
1748 return false;
1752 boolean isByTable = getConfiguration().getBoolean("hbase.master.loadbalance.bytable", false);
1753 Map<TableName, Map<ServerName, List<RegionInfo>>> assignments =
1754 this.assignmentManager.getRegionStates()
1755 .getAssignmentsForBalancer(tableStateManager, this.serverManager.getOnlineServersList(),
1756 isByTable);
1757 for (Map<ServerName, List<RegionInfo>> serverMap : assignments.values()) {
1758 serverMap.keySet().removeAll(this.serverManager.getDrainingServersList());
1761 //Give the balancer the current cluster state.
1762 this.balancer.setClusterMetrics(getClusterMetricsWithoutCoprocessor());
1763 this.balancer.setClusterLoad(assignments);
1765 List<RegionPlan> plans = new ArrayList<>();
1766 for (Entry<TableName, Map<ServerName, List<RegionInfo>>> e : assignments.entrySet()) {
1767 List<RegionPlan> partialPlans = this.balancer.balanceCluster(e.getKey(), e.getValue());
1768 if (partialPlans != null) {
1769 plans.addAll(partialPlans);
1773 List<RegionPlan> sucRPs = executeRegionPlansWithThrottling(plans);
1775 if (this.cpHost != null) {
1776 try {
1777 this.cpHost.postBalance(sucRPs);
1778 } catch (IOException ioe) {
1779 // balancing already succeeded so don't change the result
1780 LOG.error("Error invoking master coprocessor postBalance()", ioe);
1784 // If LoadBalancer did not generate any plans, it means the cluster is already balanced.
1785 // Return true indicating a success.
1786 return true;
1789 public List<RegionPlan> executeRegionPlansWithThrottling(List<RegionPlan> plans) {
1790 List<RegionPlan> sucRPs = new ArrayList<>();
1791 int maxRegionsInTransition = getMaxRegionsInTransition();
1792 long balanceStartTime = System.currentTimeMillis();
1793 long cutoffTime = balanceStartTime + this.maxBlancingTime;
1794 int rpCount = 0; // number of RegionPlans balanced so far
1795 if (plans != null && !plans.isEmpty()) {
1796 int balanceInterval = this.maxBlancingTime / plans.size();
1797 LOG.info("Balancer plans size is " + plans.size() + ", the balance interval is "
1798 + balanceInterval + " ms, and the max number regions in transition is "
1799 + maxRegionsInTransition);
1801 for (RegionPlan plan: plans) {
1802 LOG.info("balance " + plan);
1803 //TODO: bulk assign
1804 try {
1805 this.assignmentManager.moveAsync(plan);
1806 } catch (HBaseIOException hioe) {
1807 //should ignore failed plans here, avoiding the whole balance plans be aborted
1808 //later calls of balance() can fetch up the failed and skipped plans
1809 LOG.warn("Failed balance plan {}, skipping...", plan, hioe);
1811 //rpCount records balance plans processed, does not care if a plan succeeds
1812 rpCount++;
1814 if (this.maxBlancingTime > 0) {
1815 balanceThrottling(balanceStartTime + rpCount * balanceInterval, maxRegionsInTransition,
1816 cutoffTime);
1819 // if performing next balance exceeds cutoff time, exit the loop
1820 if (this.maxBlancingTime > 0 && rpCount < plans.size()
1821 && System.currentTimeMillis() > cutoffTime) {
1822 // TODO: After balance, there should not be a cutoff time (keeping it as
1823 // a security net for now)
1824 LOG.debug("No more balancing till next balance run; maxBalanceTime="
1825 + this.maxBlancingTime);
1826 break;
1830 return sucRPs;
1833 @Override
1834 @VisibleForTesting
1835 public RegionNormalizer getRegionNormalizer() {
1836 return this.normalizer;
1840 * Perform normalization of cluster (invoked by {@link RegionNormalizerChore}).
1842 * @return true if normalization step was performed successfully, false otherwise
1843 * (specifically, if HMaster hasn't been initialized properly or normalization
1844 * is globally disabled)
1846 public boolean normalizeRegions() throws IOException {
1847 if (!isInitialized()) {
1848 LOG.debug("Master has not been initialized, don't run region normalizer.");
1849 return false;
1851 if (this.getServerManager().isClusterShutdown()) {
1852 LOG.info("Cluster is shutting down, don't run region normalizer.");
1853 return false;
1855 if (isInMaintenanceMode()) {
1856 LOG.info("Master is in maintenance mode, don't run region normalizer.");
1857 return false;
1859 if (!this.regionNormalizerTracker.isNormalizerOn()) {
1860 LOG.debug("Region normalization is disabled, don't run region normalizer.");
1861 return false;
1864 synchronized (this.normalizer) {
1865 // Don't run the normalizer concurrently
1866 List<TableName> allEnabledTables = new ArrayList<>(
1867 this.tableStateManager.getTablesInStates(TableState.State.ENABLED));
1869 Collections.shuffle(allEnabledTables);
1871 for (TableName table : allEnabledTables) {
1872 if (isInMaintenanceMode()) {
1873 LOG.debug("Master is in maintenance mode, stop running region normalizer.");
1874 return false;
1877 TableDescriptor tblDesc = getTableDescriptors().get(table);
1878 if (table.isSystemTable() || (tblDesc != null &&
1879 !tblDesc.isNormalizationEnabled())) {
1880 LOG.trace("Skipping normalization for {}, as it's either system"
1881 + " table or doesn't have auto normalization turned on", table);
1882 continue;
1884 List<NormalizationPlan> plans = this.normalizer.computePlanForTable(table);
1885 if (plans != null) {
1886 for (NormalizationPlan plan : plans) {
1887 plan.execute(asyncClusterConnection.toConnection().getAdmin());
1888 if (plan.getType() == PlanType.SPLIT) {
1889 splitPlanCount++;
1890 } else if (plan.getType() == PlanType.MERGE) {
1891 mergePlanCount++;
1897 // If Region did not generate any plans, it means the cluster is already balanced.
1898 // Return true indicating a success.
1899 return true;
1903 * @return Client info for use as prefix on an audit log string; who did an action
1905 @Override
1906 public String getClientIdAuditPrefix() {
1907 return "Client=" + RpcServer.getRequestUserName().orElse(null)
1908 + "/" + RpcServer.getRemoteAddress().orElse(null);
1912 * Switch for the background CatalogJanitor thread.
1913 * Used for testing. The thread will continue to run. It will just be a noop
1914 * if disabled.
1915 * @param b If false, the catalog janitor won't do anything.
1917 public void setCatalogJanitorEnabled(final boolean b) {
1918 this.catalogJanitorChore.setEnabled(b);
1921 @Override
1922 public long mergeRegions(
1923 final RegionInfo[] regionsToMerge,
1924 final boolean forcible,
1925 final long ng,
1926 final long nonce) throws IOException {
1927 checkInitialized();
1929 if (!isSplitOrMergeEnabled(MasterSwitchType.MERGE)) {
1930 String regionsStr = Arrays.deepToString(regionsToMerge);
1931 LOG.warn("Merge switch is off! skip merge of " + regionsStr);
1932 throw new IOException("Merge of " + regionsStr + " failed because merge switch is off");
1935 final String mergeRegionsStr = Arrays.stream(regionsToMerge).
1936 map(r -> RegionInfo.getShortNameToLog(r)).collect(Collectors.joining(", "));
1937 return MasterProcedureUtil.submitProcedure(new NonceProcedureRunnable(this, ng, nonce) {
1938 @Override
1939 protected void run() throws IOException {
1940 getMaster().getMasterCoprocessorHost().preMergeRegions(regionsToMerge);
1941 String aid = getClientIdAuditPrefix();
1942 LOG.info("{} merge regions {}", aid, mergeRegionsStr);
1943 submitProcedure(new MergeTableRegionsProcedure(procedureExecutor.getEnvironment(),
1944 regionsToMerge, forcible));
1945 getMaster().getMasterCoprocessorHost().postMergeRegions(regionsToMerge);
1948 @Override
1949 protected String getDescription() {
1950 return "MergeTableProcedure";
1955 @Override
1956 public long splitRegion(final RegionInfo regionInfo, final byte[] splitRow,
1957 final long nonceGroup, final long nonce)
1958 throws IOException {
1959 checkInitialized();
1961 if (!isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) {
1962 LOG.warn("Split switch is off! skip split of " + regionInfo);
1963 throw new IOException("Split region " + regionInfo.getRegionNameAsString() +
1964 " failed due to split switch off");
1967 return MasterProcedureUtil.submitProcedure(
1968 new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
1969 @Override
1970 protected void run() throws IOException {
1971 getMaster().getMasterCoprocessorHost().preSplitRegion(regionInfo.getTable(), splitRow);
1972 LOG.info(getClientIdAuditPrefix() + " split " + regionInfo.getRegionNameAsString());
1974 // Execute the operation asynchronously
1975 submitProcedure(getAssignmentManager().createSplitProcedure(regionInfo, splitRow));
1978 @Override
1979 protected String getDescription() {
1980 return "SplitTableProcedure";
1985 private void warmUpRegion(ServerName server, RegionInfo region) {
1986 FutureUtils.addListener(asyncClusterConnection.getRegionServerAdmin(server)
1987 .warmupRegion(RequestConverter.buildWarmupRegionRequest(region)), (r, e) -> {
1988 if (e != null) {
1989 LOG.warn("Failed to warm up region {} on server {}", region, server, e);
1994 // Public so can be accessed by tests. Blocks until move is done.
1995 // Replace with an async implementation from which you can get
1996 // a success/failure result.
1997 @VisibleForTesting
1998 public void move(final byte[] encodedRegionName, byte[] destServerName) throws HBaseIOException {
1999 RegionState regionState = assignmentManager.getRegionStates().
2000 getRegionState(Bytes.toString(encodedRegionName));
2002 RegionInfo hri;
2003 if (regionState != null) {
2004 hri = regionState.getRegion();
2005 } else {
2006 throw new UnknownRegionException(Bytes.toStringBinary(encodedRegionName));
2009 ServerName dest;
2010 List<ServerName> exclude = hri.getTable().isSystemTable() ? assignmentManager.getExcludedServersForSystemTable()
2011 : new ArrayList<>(1);
2012 if (destServerName != null && exclude.contains(ServerName.valueOf(Bytes.toString(destServerName)))) {
2013 LOG.info(
2014 Bytes.toString(encodedRegionName) + " can not move to " + Bytes.toString(destServerName)
2015 + " because the server is in exclude list");
2016 destServerName = null;
2018 if (destServerName == null || destServerName.length == 0) {
2019 LOG.info("Passed destination servername is null/empty so " +
2020 "choosing a server at random");
2021 exclude.add(regionState.getServerName());
2022 final List<ServerName> destServers = this.serverManager.createDestinationServersList(exclude);
2023 dest = balancer.randomAssignment(hri, destServers);
2024 if (dest == null) {
2025 LOG.debug("Unable to determine a plan to assign " + hri);
2026 return;
2028 } else {
2029 ServerName candidate = ServerName.valueOf(Bytes.toString(destServerName));
2030 dest = balancer.randomAssignment(hri, Lists.newArrayList(candidate));
2031 if (dest == null) {
2032 LOG.debug("Unable to determine a plan to assign " + hri);
2033 return;
2035 // TODO: What is this? I don't get it.
2036 if (dest.equals(serverName) && balancer instanceof BaseLoadBalancer
2037 && !((BaseLoadBalancer)balancer).shouldBeOnMaster(hri)) {
2038 // To avoid unnecessary region moving later by balancer. Don't put user
2039 // regions on master.
2040 LOG.debug("Skipping move of region " + hri.getRegionNameAsString()
2041 + " to avoid unnecessary region moving later by load balancer,"
2042 + " because it should not be on master");
2043 return;
2047 if (dest.equals(regionState.getServerName())) {
2048 LOG.debug("Skipping move of region " + hri.getRegionNameAsString()
2049 + " because region already assigned to the same server " + dest + ".");
2050 return;
2053 // Now we can do the move
2054 RegionPlan rp = new RegionPlan(hri, regionState.getServerName(), dest);
2055 assert rp.getDestination() != null: rp.toString() + " " + dest;
2057 try {
2058 checkInitialized();
2059 if (this.cpHost != null) {
2060 this.cpHost.preMove(hri, rp.getSource(), rp.getDestination());
2063 TransitRegionStateProcedure proc =
2064 this.assignmentManager.createMoveRegionProcedure(rp.getRegionInfo(), rp.getDestination());
2065 // Warmup the region on the destination before initiating the move.
2066 // A region server could reject the close request because it either does not
2067 // have the specified region or the region is being split.
2068 warmUpRegion(rp.getDestination(), hri);
2070 LOG.info(getClientIdAuditPrefix() + " move " + rp + ", running balancer");
2071 Future<byte[]> future = ProcedureSyncWait.submitProcedure(this.procedureExecutor, proc);
2072 try {
2073 // Is this going to work? Will we throw exception on error?
2074 // TODO: CompletableFuture rather than this stunted Future.
2075 future.get();
2076 } catch (InterruptedException | ExecutionException e) {
2077 throw new HBaseIOException(e);
2079 if (this.cpHost != null) {
2080 this.cpHost.postMove(hri, rp.getSource(), rp.getDestination());
2082 } catch (IOException ioe) {
2083 if (ioe instanceof HBaseIOException) {
2084 throw (HBaseIOException)ioe;
2086 throw new HBaseIOException(ioe);
2090 @Override
2091 public long createTable(final TableDescriptor tableDescriptor, final byte[][] splitKeys,
2092 final long nonceGroup, final long nonce) throws IOException {
2093 checkInitialized();
2094 TableDescriptor desc = getMasterCoprocessorHost().preCreateTableRegionsInfos(tableDescriptor);
2095 if (desc == null) {
2096 throw new IOException("Creation for " + tableDescriptor + " is canceled by CP");
2098 String namespace = desc.getTableName().getNamespaceAsString();
2099 this.clusterSchemaService.getNamespace(namespace);
2101 RegionInfo[] newRegions = ModifyRegionUtils.createRegionInfos(desc, splitKeys);
2102 TableDescriptorChecker.sanityCheck(conf, desc);
2104 return MasterProcedureUtil
2105 .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2106 @Override
2107 protected void run() throws IOException {
2108 getMaster().getMasterCoprocessorHost().preCreateTable(desc, newRegions);
2110 LOG.info(getClientIdAuditPrefix() + " create " + desc);
2112 // TODO: We can handle/merge duplicate requests, and differentiate the case of
2113 // TableExistsException by saying if the schema is the same or not.
2115 // We need to wait for the procedure to potentially fail due to "prepare" sanity
2116 // checks. This will block only the beginning of the procedure. See HBASE-19953.
2117 ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
2118 submitProcedure(
2119 new CreateTableProcedure(procedureExecutor.getEnvironment(), desc, newRegions, latch));
2120 latch.await();
2122 getMaster().getMasterCoprocessorHost().postCreateTable(desc, newRegions);
2125 @Override
2126 protected String getDescription() {
2127 return "CreateTableProcedure";
2132 @Override
2133 public long createSystemTable(final TableDescriptor tableDescriptor) throws IOException {
2134 if (isStopped()) {
2135 throw new MasterNotRunningException();
2138 TableName tableName = tableDescriptor.getTableName();
2139 if (!(tableName.isSystemTable())) {
2140 throw new IllegalArgumentException(
2141 "Only system table creation can use this createSystemTable API");
2144 RegionInfo[] newRegions = ModifyRegionUtils.createRegionInfos(tableDescriptor, null);
2146 LOG.info(getClientIdAuditPrefix() + " create " + tableDescriptor);
2148 // This special create table is called locally to master. Therefore, no RPC means no need
2149 // to use nonce to detect duplicated RPC call.
2150 long procId = this.procedureExecutor.submitProcedure(
2151 new CreateTableProcedure(procedureExecutor.getEnvironment(), tableDescriptor, newRegions));
2153 return procId;
2156 private void startActiveMasterManager(int infoPort) throws KeeperException {
2157 String backupZNode = ZNodePaths.joinZNode(
2158 zooKeeper.getZNodePaths().backupMasterAddressesZNode, serverName.toString());
2160 * Add a ZNode for ourselves in the backup master directory since we
2161 * may not become the active master. If so, we want the actual active
2162 * master to know we are backup masters, so that it won't assign
2163 * regions to us if so configured.
2165 * If we become the active master later, ActiveMasterManager will delete
2166 * this node explicitly. If we crash before then, ZooKeeper will delete
2167 * this node for us since it is ephemeral.
2169 LOG.info("Adding backup master ZNode " + backupZNode);
2170 if (!MasterAddressTracker.setMasterAddress(zooKeeper, backupZNode, serverName, infoPort)) {
2171 LOG.warn("Failed create of " + backupZNode + " by " + serverName);
2173 this.activeMasterManager.setInfoPort(infoPort);
2174 int timeout = conf.getInt(HConstants.ZK_SESSION_TIMEOUT, HConstants.DEFAULT_ZK_SESSION_TIMEOUT);
2175 // If we're a backup master, stall until a primary to write this address
2176 if (conf.getBoolean(HConstants.MASTER_TYPE_BACKUP, HConstants.DEFAULT_MASTER_TYPE_BACKUP)) {
2177 LOG.debug("HMaster started in backup mode. Stalling until master znode is written.");
2178 // This will only be a minute or so while the cluster starts up,
2179 // so don't worry about setting watches on the parent znode
2180 while (!activeMasterManager.hasActiveMaster()) {
2181 LOG.debug("Waiting for master address and cluster state znode to be written.");
2182 Threads.sleep(timeout);
2185 MonitoredTask status = TaskMonitor.get().createStatus("Master startup");
2186 status.setDescription("Master startup");
2187 try {
2188 if (activeMasterManager.blockUntilBecomingActiveMaster(timeout, status)) {
2189 finishActiveMasterInitialization(status);
2191 } catch (Throwable t) {
2192 status.setStatus("Failed to become active: " + t.getMessage());
2193 LOG.error(HBaseMarkers.FATAL, "Failed to become active master", t);
2194 // HBASE-5680: Likely hadoop23 vs hadoop 20.x/1.x incompatibility
2195 if (t instanceof NoClassDefFoundError && t.getMessage().
2196 contains("org/apache/hadoop/hdfs/protocol/HdfsConstants$SafeModeAction")) {
2197 // improved error message for this special case
2198 abort("HBase is having a problem with its Hadoop jars. You may need to recompile " +
2199 "HBase against Hadoop version " + org.apache.hadoop.util.VersionInfo.getVersion() +
2200 " or change your hadoop jars to start properly", t);
2201 } else {
2202 abort("Unhandled exception. Starting shutdown.", t);
2204 } finally {
2205 status.cleanup();
2209 private static boolean isCatalogTable(final TableName tableName) {
2210 return tableName.equals(TableName.META_TABLE_NAME);
2213 @Override
2214 public long deleteTable(
2215 final TableName tableName,
2216 final long nonceGroup,
2217 final long nonce) throws IOException {
2218 checkInitialized();
2220 return MasterProcedureUtil.submitProcedure(
2221 new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2222 @Override
2223 protected void run() throws IOException {
2224 getMaster().getMasterCoprocessorHost().preDeleteTable(tableName);
2226 LOG.info(getClientIdAuditPrefix() + " delete " + tableName);
2228 // TODO: We can handle/merge duplicate request
2230 // We need to wait for the procedure to potentially fail due to "prepare" sanity
2231 // checks. This will block only the beginning of the procedure. See HBASE-19953.
2232 ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
2233 submitProcedure(new DeleteTableProcedure(procedureExecutor.getEnvironment(),
2234 tableName, latch));
2235 latch.await();
2237 getMaster().getMasterCoprocessorHost().postDeleteTable(tableName);
2240 @Override
2241 protected String getDescription() {
2242 return "DeleteTableProcedure";
2247 @Override
2248 public long truncateTable(
2249 final TableName tableName,
2250 final boolean preserveSplits,
2251 final long nonceGroup,
2252 final long nonce) throws IOException {
2253 checkInitialized();
2255 return MasterProcedureUtil.submitProcedure(
2256 new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2257 @Override
2258 protected void run() throws IOException {
2259 getMaster().getMasterCoprocessorHost().preTruncateTable(tableName);
2261 LOG.info(getClientIdAuditPrefix() + " truncate " + tableName);
2262 ProcedurePrepareLatch latch = ProcedurePrepareLatch.createLatch(2, 0);
2263 submitProcedure(new TruncateTableProcedure(procedureExecutor.getEnvironment(),
2264 tableName, preserveSplits, latch));
2265 latch.await();
2267 getMaster().getMasterCoprocessorHost().postTruncateTable(tableName);
2270 @Override
2271 protected String getDescription() {
2272 return "TruncateTableProcedure";
2277 @Override
2278 public long addColumn(final TableName tableName, final ColumnFamilyDescriptor column,
2279 final long nonceGroup, final long nonce) throws IOException {
2280 checkInitialized();
2281 checkTableExists(tableName);
2283 return modifyTable(tableName, new TableDescriptorGetter() {
2285 @Override
2286 public TableDescriptor get() throws IOException {
2287 TableDescriptor old = getTableDescriptors().get(tableName);
2288 if (old.hasColumnFamily(column.getName())) {
2289 throw new InvalidFamilyOperationException("Column family '" + column.getNameAsString()
2290 + "' in table '" + tableName + "' already exists so cannot be added");
2293 return TableDescriptorBuilder.newBuilder(old).setColumnFamily(column).build();
2295 }, nonceGroup, nonce, true);
2299 * Implement to return TableDescriptor after pre-checks
2301 protected interface TableDescriptorGetter {
2302 TableDescriptor get() throws IOException;
2305 @Override
2306 public long modifyColumn(final TableName tableName, final ColumnFamilyDescriptor descriptor,
2307 final long nonceGroup, final long nonce) throws IOException {
2308 checkInitialized();
2309 checkTableExists(tableName);
2310 return modifyTable(tableName, new TableDescriptorGetter() {
2312 @Override
2313 public TableDescriptor get() throws IOException {
2314 TableDescriptor old = getTableDescriptors().get(tableName);
2315 if (!old.hasColumnFamily(descriptor.getName())) {
2316 throw new InvalidFamilyOperationException("Family '" + descriptor.getNameAsString()
2317 + "' does not exist, so it cannot be modified");
2320 return TableDescriptorBuilder.newBuilder(old).modifyColumnFamily(descriptor).build();
2322 }, nonceGroup, nonce, true);
2325 @Override
2326 public long deleteColumn(final TableName tableName, final byte[] columnName,
2327 final long nonceGroup, final long nonce) throws IOException {
2328 checkInitialized();
2329 checkTableExists(tableName);
2331 return modifyTable(tableName, new TableDescriptorGetter() {
2333 @Override
2334 public TableDescriptor get() throws IOException {
2335 TableDescriptor old = getTableDescriptors().get(tableName);
2337 if (!old.hasColumnFamily(columnName)) {
2338 throw new InvalidFamilyOperationException("Family '" + Bytes.toString(columnName)
2339 + "' does not exist, so it cannot be deleted");
2341 if (old.getColumnFamilyCount() == 1) {
2342 throw new InvalidFamilyOperationException("Family '" + Bytes.toString(columnName)
2343 + "' is the only column family in the table, so it cannot be deleted");
2345 return TableDescriptorBuilder.newBuilder(old).removeColumnFamily(columnName).build();
2347 }, nonceGroup, nonce, true);
2350 @Override
2351 public long enableTable(final TableName tableName, final long nonceGroup, final long nonce)
2352 throws IOException {
2353 checkInitialized();
2355 return MasterProcedureUtil.submitProcedure(
2356 new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2357 @Override
2358 protected void run() throws IOException {
2359 getMaster().getMasterCoprocessorHost().preEnableTable(tableName);
2361 // Normally, it would make sense for this authorization check to exist inside
2362 // AccessController, but because the authorization check is done based on internal state
2363 // (rather than explicit permissions) we'll do the check here instead of in the
2364 // coprocessor.
2365 MasterQuotaManager quotaManager = getMasterQuotaManager();
2366 if (quotaManager != null) {
2367 if (quotaManager.isQuotaInitialized()) {
2368 SpaceQuotaSnapshot currSnapshotOfTable =
2369 QuotaTableUtil.getCurrentSnapshotFromQuotaTable(getConnection(), tableName);
2370 if (currSnapshotOfTable != null) {
2371 SpaceQuotaStatus quotaStatus = currSnapshotOfTable.getQuotaStatus();
2372 if (quotaStatus.isInViolation()
2373 && SpaceViolationPolicy.DISABLE == quotaStatus.getPolicy().orElse(null)) {
2374 throw new AccessDeniedException("Enabling the table '" + tableName
2375 + "' is disallowed due to a violated space quota.");
2378 } else if (LOG.isTraceEnabled()) {
2379 LOG.trace("Unable to check for space quotas as the MasterQuotaManager is not enabled");
2383 LOG.info(getClientIdAuditPrefix() + " enable " + tableName);
2385 // Execute the operation asynchronously - client will check the progress of the operation
2386 // In case the request is from a <1.1 client before returning,
2387 // we want to make sure that the table is prepared to be
2388 // enabled (the table is locked and the table state is set).
2389 // Note: if the procedure throws exception, we will catch it and rethrow.
2390 final ProcedurePrepareLatch prepareLatch = ProcedurePrepareLatch.createLatch();
2391 submitProcedure(new EnableTableProcedure(procedureExecutor.getEnvironment(),
2392 tableName, prepareLatch));
2393 prepareLatch.await();
2395 getMaster().getMasterCoprocessorHost().postEnableTable(tableName);
2398 @Override
2399 protected String getDescription() {
2400 return "EnableTableProcedure";
2405 @Override
2406 public long disableTable(final TableName tableName, final long nonceGroup, final long nonce)
2407 throws IOException {
2408 checkInitialized();
2410 return MasterProcedureUtil.submitProcedure(
2411 new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2412 @Override
2413 protected void run() throws IOException {
2414 getMaster().getMasterCoprocessorHost().preDisableTable(tableName);
2416 LOG.info(getClientIdAuditPrefix() + " disable " + tableName);
2418 // Execute the operation asynchronously - client will check the progress of the operation
2419 // In case the request is from a <1.1 client before returning,
2420 // we want to make sure that the table is prepared to be
2421 // enabled (the table is locked and the table state is set).
2422 // Note: if the procedure throws exception, we will catch it and rethrow.
2424 // We need to wait for the procedure to potentially fail due to "prepare" sanity
2425 // checks. This will block only the beginning of the procedure. See HBASE-19953.
2426 final ProcedurePrepareLatch prepareLatch = ProcedurePrepareLatch.createBlockingLatch();
2427 submitProcedure(new DisableTableProcedure(procedureExecutor.getEnvironment(),
2428 tableName, false, prepareLatch));
2429 prepareLatch.await();
2431 getMaster().getMasterCoprocessorHost().postDisableTable(tableName);
2434 @Override
2435 protected String getDescription() {
2436 return "DisableTableProcedure";
2441 private long modifyTable(final TableName tableName,
2442 final TableDescriptorGetter newDescriptorGetter, final long nonceGroup, final long nonce,
2443 final boolean shouldCheckDescriptor) throws IOException {
2444 return MasterProcedureUtil
2445 .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2446 @Override
2447 protected void run() throws IOException {
2448 TableDescriptor oldDescriptor = getMaster().getTableDescriptors().get(tableName);
2449 TableDescriptor newDescriptor = getMaster().getMasterCoprocessorHost()
2450 .preModifyTable(tableName, oldDescriptor, newDescriptorGetter.get());
2451 TableDescriptorChecker.sanityCheck(conf, newDescriptor);
2452 LOG.info("{} modify table {} from {} to {}", getClientIdAuditPrefix(), tableName,
2453 oldDescriptor, newDescriptor);
2455 // Execute the operation synchronously - wait for the operation completes before
2456 // continuing.
2458 // We need to wait for the procedure to potentially fail due to "prepare" sanity
2459 // checks. This will block only the beginning of the procedure. See HBASE-19953.
2460 ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
2461 submitProcedure(new ModifyTableProcedure(procedureExecutor.getEnvironment(),
2462 newDescriptor, latch, oldDescriptor, shouldCheckDescriptor));
2463 latch.await();
2465 getMaster().getMasterCoprocessorHost().postModifyTable(tableName, oldDescriptor,
2466 newDescriptor);
2469 @Override
2470 protected String getDescription() {
2471 return "ModifyTableProcedure";
2477 @Override
2478 public long modifyTable(final TableName tableName, final TableDescriptor newDescriptor,
2479 final long nonceGroup, final long nonce) throws IOException {
2480 checkInitialized();
2481 return modifyTable(tableName, new TableDescriptorGetter() {
2482 @Override
2483 public TableDescriptor get() throws IOException {
2484 return newDescriptor;
2486 }, nonceGroup, nonce, false);
2490 public long restoreSnapshot(final SnapshotDescription snapshotDesc,
2491 final long nonceGroup, final long nonce, final boolean restoreAcl) throws IOException {
2492 checkInitialized();
2493 getSnapshotManager().checkSnapshotSupport();
2495 // Ensure namespace exists. Will throw exception if non-known NS.
2496 final TableName dstTable = TableName.valueOf(snapshotDesc.getTable());
2497 getClusterSchema().getNamespace(dstTable.getNamespaceAsString());
2499 return MasterProcedureUtil.submitProcedure(
2500 new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2501 @Override
2502 protected void run() throws IOException {
2503 setProcId(
2504 getSnapshotManager().restoreOrCloneSnapshot(snapshotDesc, getNonceKey(), restoreAcl));
2507 @Override
2508 protected String getDescription() {
2509 return "RestoreSnapshotProcedure";
2514 private void checkTableExists(final TableName tableName)
2515 throws IOException, TableNotFoundException {
2516 if (!MetaTableAccessor.tableExists(getConnection(), tableName)) {
2517 throw new TableNotFoundException(tableName);
2521 @Override
2522 public void checkTableModifiable(final TableName tableName)
2523 throws IOException, TableNotFoundException, TableNotDisabledException {
2524 if (isCatalogTable(tableName)) {
2525 throw new IOException("Can't modify catalog tables");
2527 checkTableExists(tableName);
2528 TableState ts = getTableStateManager().getTableState(tableName);
2529 if (!ts.isDisabled()) {
2530 throw new TableNotDisabledException("Not DISABLED; " + ts);
2534 public ClusterMetrics getClusterMetricsWithoutCoprocessor() throws InterruptedIOException {
2535 return getClusterMetricsWithoutCoprocessor(EnumSet.allOf(Option.class));
2538 public ClusterMetrics getClusterMetricsWithoutCoprocessor(EnumSet<Option> options)
2539 throws InterruptedIOException {
2540 ClusterMetricsBuilder builder = ClusterMetricsBuilder.newBuilder();
2541 // given that hbase1 can't submit the request with Option,
2542 // we return all information to client if the list of Option is empty.
2543 if (options.isEmpty()) {
2544 options = EnumSet.allOf(Option.class);
2547 for (Option opt : options) {
2548 switch (opt) {
2549 case HBASE_VERSION: builder.setHBaseVersion(VersionInfo.getVersion()); break;
2550 case CLUSTER_ID: builder.setClusterId(getClusterId()); break;
2551 case MASTER: builder.setMasterName(getServerName()); break;
2552 case BACKUP_MASTERS: builder.setBackerMasterNames(getBackupMasters()); break;
2553 case LIVE_SERVERS: {
2554 if (serverManager != null) {
2555 builder.setLiveServerMetrics(serverManager.getOnlineServers().entrySet().stream()
2556 .collect(Collectors.toMap(e -> e.getKey(), e -> e.getValue())));
2558 break;
2560 case DEAD_SERVERS: {
2561 if (serverManager != null) {
2562 builder.setDeadServerNames(new ArrayList<>(
2563 serverManager.getDeadServers().copyServerNames()));
2565 break;
2567 case MASTER_COPROCESSORS: {
2568 if (cpHost != null) {
2569 builder.setMasterCoprocessorNames(Arrays.asList(getMasterCoprocessors()));
2571 break;
2573 case REGIONS_IN_TRANSITION: {
2574 if (assignmentManager != null) {
2575 builder.setRegionsInTransition(assignmentManager.getRegionStates()
2576 .getRegionsStateInTransition());
2578 break;
2580 case BALANCER_ON: {
2581 if (loadBalancerTracker != null) {
2582 builder.setBalancerOn(loadBalancerTracker.isBalancerOn());
2584 break;
2586 case MASTER_INFO_PORT: {
2587 if (infoServer != null) {
2588 builder.setMasterInfoPort(infoServer.getPort());
2590 break;
2592 case SERVERS_NAME: {
2593 if (serverManager != null) {
2594 builder.setServerNames(serverManager.getOnlineServersList());
2596 break;
2598 case TABLE_TO_REGIONS_COUNT: {
2599 if (isActiveMaster() && isInitialized() && assignmentManager != null) {
2600 try {
2601 Map<TableName, RegionStatesCount> tableRegionStatesCountMap = new HashMap<>();
2602 Map<String, TableDescriptor> tableDescriptorMap = getTableDescriptors().getAll();
2603 for (TableDescriptor tableDescriptor : tableDescriptorMap.values()) {
2604 TableName tableName = tableDescriptor.getTableName();
2605 RegionStatesCount regionStatesCount = assignmentManager
2606 .getRegionStatesCount(tableName);
2607 tableRegionStatesCountMap.put(tableName, regionStatesCount);
2609 builder.setTableRegionStatesCount(tableRegionStatesCountMap);
2610 } catch (IOException e) {
2611 LOG.error("Error while populating TABLE_TO_REGIONS_COUNT for Cluster Metrics..", e);
2614 break;
2618 return builder.build();
2622 * @return cluster status
2624 public ClusterMetrics getClusterMetrics() throws IOException {
2625 return getClusterMetrics(EnumSet.allOf(Option.class));
2628 public ClusterMetrics getClusterMetrics(EnumSet<Option> options) throws IOException {
2629 if (cpHost != null) {
2630 cpHost.preGetClusterMetrics();
2632 ClusterMetrics status = getClusterMetricsWithoutCoprocessor(options);
2633 if (cpHost != null) {
2634 cpHost.postGetClusterMetrics(status);
2636 return status;
2639 private List<ServerName> getBackupMasters() throws InterruptedIOException {
2640 // Build Set of backup masters from ZK nodes
2641 List<String> backupMasterStrings;
2642 try {
2643 backupMasterStrings = ZKUtil.listChildrenNoWatch(this.zooKeeper,
2644 this.zooKeeper.getZNodePaths().backupMasterAddressesZNode);
2645 } catch (KeeperException e) {
2646 LOG.warn(this.zooKeeper.prefix("Unable to list backup servers"), e);
2647 backupMasterStrings = null;
2650 List<ServerName> backupMasters = Collections.emptyList();
2651 if (backupMasterStrings != null && !backupMasterStrings.isEmpty()) {
2652 backupMasters = new ArrayList<>(backupMasterStrings.size());
2653 for (String s: backupMasterStrings) {
2654 try {
2655 byte [] bytes;
2656 try {
2657 bytes = ZKUtil.getData(this.zooKeeper, ZNodePaths.joinZNode(
2658 this.zooKeeper.getZNodePaths().backupMasterAddressesZNode, s));
2659 } catch (InterruptedException e) {
2660 throw new InterruptedIOException();
2662 if (bytes != null) {
2663 ServerName sn;
2664 try {
2665 sn = ProtobufUtil.parseServerNameFrom(bytes);
2666 } catch (DeserializationException e) {
2667 LOG.warn("Failed parse, skipping registering backup server", e);
2668 continue;
2670 backupMasters.add(sn);
2672 } catch (KeeperException e) {
2673 LOG.warn(this.zooKeeper.prefix("Unable to get information about " +
2674 "backup servers"), e);
2677 Collections.sort(backupMasters, new Comparator<ServerName>() {
2678 @Override
2679 public int compare(ServerName s1, ServerName s2) {
2680 return s1.getServerName().compareTo(s2.getServerName());
2681 }});
2683 return backupMasters;
2687 * The set of loaded coprocessors is stored in a static set. Since it's
2688 * statically allocated, it does not require that HMaster's cpHost be
2689 * initialized prior to accessing it.
2690 * @return a String representation of the set of names of the loaded coprocessors.
2692 public static String getLoadedCoprocessors() {
2693 return CoprocessorHost.getLoadedCoprocessors().toString();
2697 * @return timestamp in millis when HMaster was started.
2699 public long getMasterStartTime() {
2700 return startcode;
2704 * @return timestamp in millis when HMaster became the active master.
2706 public long getMasterActiveTime() {
2707 return masterActiveTime;
2711 * @return timestamp in millis when HMaster finished becoming the active master
2713 public long getMasterFinishedInitializationTime() {
2714 return masterFinishedInitializationTime;
2717 public int getNumWALFiles() {
2718 return 0;
2721 public ProcedureStore getProcedureStore() {
2722 return procedureStore;
2725 public int getRegionServerInfoPort(final ServerName sn) {
2726 int port = this.serverManager.getInfoPort(sn);
2727 return port == 0 ? conf.getInt(HConstants.REGIONSERVER_INFO_PORT,
2728 HConstants.DEFAULT_REGIONSERVER_INFOPORT) : port;
2731 @Override
2732 public String getRegionServerVersion(ServerName sn) {
2733 // Will return "0.0.0" if the server is not online to prevent move system region to unknown
2734 // version RS.
2735 return this.serverManager.getVersion(sn);
2738 @Override
2739 public void checkIfShouldMoveSystemRegionAsync() {
2740 assignmentManager.checkIfShouldMoveSystemRegionAsync();
2744 * @return array of coprocessor SimpleNames.
2746 public String[] getMasterCoprocessors() {
2747 Set<String> masterCoprocessors = getMasterCoprocessorHost().getCoprocessors();
2748 return masterCoprocessors.toArray(new String[masterCoprocessors.size()]);
2751 @Override
2752 public void abort(String reason, Throwable cause) {
2753 if (isAborted() || isStopped()) {
2754 return;
2756 setAbortRequested();
2757 if (cpHost != null) {
2758 // HBASE-4014: dump a list of loaded coprocessors.
2759 LOG.error(HBaseMarkers.FATAL, "Master server abort: loaded coprocessors are: " +
2760 getLoadedCoprocessors());
2762 String msg = "***** ABORTING master " + this + ": " + reason + " *****";
2763 if (cause != null) {
2764 LOG.error(HBaseMarkers.FATAL, msg, cause);
2765 } else {
2766 LOG.error(HBaseMarkers.FATAL, msg);
2769 try {
2770 stopMaster();
2771 } catch (IOException e) {
2772 LOG.error("Exception occurred while stopping master", e);
2776 @Override
2777 public ZKWatcher getZooKeeper() {
2778 return zooKeeper;
2781 @Override
2782 public MasterCoprocessorHost getMasterCoprocessorHost() {
2783 return cpHost;
2786 @Override
2787 public MasterQuotaManager getMasterQuotaManager() {
2788 return quotaManager;
2791 @Override
2792 public ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() {
2793 return procedureExecutor;
2796 @Override
2797 public ServerName getServerName() {
2798 return this.serverName;
2801 @Override
2802 public AssignmentManager getAssignmentManager() {
2803 return this.assignmentManager;
2806 @Override
2807 public CatalogJanitor getCatalogJanitor() {
2808 return this.catalogJanitorChore;
2811 public MemoryBoundedLogMessageBuffer getRegionServerFatalLogBuffer() {
2812 return rsFatals;
2816 * Shutdown the cluster.
2817 * Master runs a coordinated stop of all RegionServers and then itself.
2819 public void shutdown() throws IOException {
2820 if (cpHost != null) {
2821 cpHost.preShutdown();
2824 // Tell the servermanager cluster shutdown has been called. This makes it so when Master is
2825 // last running server, it'll stop itself. Next, we broadcast the cluster shutdown by setting
2826 // the cluster status as down. RegionServers will notice this change in state and will start
2827 // shutting themselves down. When last has exited, Master can go down.
2828 if (this.serverManager != null) {
2829 this.serverManager.shutdownCluster();
2831 if (this.clusterStatusTracker != null) {
2832 try {
2833 this.clusterStatusTracker.setClusterDown();
2834 } catch (KeeperException e) {
2835 LOG.error("ZooKeeper exception trying to set cluster as down in ZK", e);
2838 // Stop the procedure executor. Will stop any ongoing assign, unassign, server crash etc.,
2839 // processing so we can go down.
2840 if (this.procedureExecutor != null) {
2841 this.procedureExecutor.stop();
2843 // Shutdown our cluster connection. This will kill any hosted RPCs that might be going on;
2844 // this is what we want especially if the Master is in startup phase doing call outs to
2845 // hbase:meta, etc. when cluster is down. Without ths connection close, we'd have to wait on
2846 // the rpc to timeout.
2847 if (this.asyncClusterConnection != null) {
2848 this.asyncClusterConnection.close();
2852 public void stopMaster() throws IOException {
2853 if (cpHost != null) {
2854 cpHost.preStopMaster();
2856 stop("Stopped by " + Thread.currentThread().getName());
2859 @Override
2860 public void stop(String msg) {
2861 if (!isStopped()) {
2862 super.stop(msg);
2863 if (this.activeMasterManager != null) {
2864 this.activeMasterManager.stop();
2869 @VisibleForTesting
2870 protected void checkServiceStarted() throws ServerNotRunningYetException {
2871 if (!serviceStarted) {
2872 throw new ServerNotRunningYetException("Server is not running yet");
2876 public static class MasterStoppedException extends DoNotRetryIOException {
2877 MasterStoppedException() {
2878 super();
2882 void checkInitialized() throws PleaseHoldException, ServerNotRunningYetException,
2883 MasterNotRunningException, MasterStoppedException {
2884 checkServiceStarted();
2885 if (!isInitialized()) {
2886 throw new PleaseHoldException("Master is initializing");
2888 if (isStopped()) {
2889 throw new MasterStoppedException();
2894 * Report whether this master is currently the active master or not.
2895 * If not active master, we are parked on ZK waiting to become active.
2897 * This method is used for testing.
2899 * @return true if active master, false if not.
2901 @Override
2902 public boolean isActiveMaster() {
2903 return activeMaster;
2907 * Report whether this master has completed with its initialization and is
2908 * ready. If ready, the master is also the active master. A standby master
2909 * is never ready.
2911 * This method is used for testing.
2913 * @return true if master is ready to go, false if not.
2915 @Override
2916 public boolean isInitialized() {
2917 return initialized.isReady();
2921 * Report whether this master is in maintenance mode.
2923 * @return true if master is in maintenanceMode
2925 @Override
2926 public boolean isInMaintenanceMode() {
2927 return maintenanceMode;
2930 @VisibleForTesting
2931 public void setInitialized(boolean isInitialized) {
2932 procedureExecutor.getEnvironment().setEventReady(initialized, isInitialized);
2935 @Override
2936 public ProcedureEvent<?> getInitializedEvent() {
2937 return initialized;
2941 * Compute the average load across all region servers.
2942 * Currently, this uses a very naive computation - just uses the number of
2943 * regions being served, ignoring stats about number of requests.
2944 * @return the average load
2946 public double getAverageLoad() {
2947 if (this.assignmentManager == null) {
2948 return 0;
2951 RegionStates regionStates = this.assignmentManager.getRegionStates();
2952 if (regionStates == null) {
2953 return 0;
2955 return regionStates.getAverageLoad();
2959 * @return the count of region split plans executed
2961 public long getSplitPlanCount() {
2962 return splitPlanCount;
2966 * @return the count of region merge plans executed
2968 public long getMergePlanCount() {
2969 return mergePlanCount;
2972 @Override
2973 public boolean registerService(Service instance) {
2975 * No stacking of instances is allowed for a single service name
2977 Descriptors.ServiceDescriptor serviceDesc = instance.getDescriptorForType();
2978 String serviceName = CoprocessorRpcUtils.getServiceName(serviceDesc);
2979 if (coprocessorServiceHandlers.containsKey(serviceName)) {
2980 LOG.error("Coprocessor service "+serviceName+
2981 " already registered, rejecting request from "+instance
2983 return false;
2986 coprocessorServiceHandlers.put(serviceName, instance);
2987 if (LOG.isDebugEnabled()) {
2988 LOG.debug("Registered master coprocessor service: service="+serviceName);
2990 return true;
2994 * Utility for constructing an instance of the passed HMaster class.
2995 * @param masterClass
2996 * @return HMaster instance.
2998 public static HMaster constructMaster(Class<? extends HMaster> masterClass,
2999 final Configuration conf) {
3000 try {
3001 Constructor<? extends HMaster> c = masterClass.getConstructor(Configuration.class);
3002 return c.newInstance(conf);
3003 } catch(Exception e) {
3004 Throwable error = e;
3005 if (e instanceof InvocationTargetException &&
3006 ((InvocationTargetException)e).getTargetException() != null) {
3007 error = ((InvocationTargetException)e).getTargetException();
3009 throw new RuntimeException("Failed construction of Master: " + masterClass.toString() + ". "
3010 , error);
3015 * @see org.apache.hadoop.hbase.master.HMasterCommandLine
3017 public static void main(String [] args) {
3018 LOG.info("STARTING service " + HMaster.class.getSimpleName());
3019 VersionInfo.logVersion();
3020 new HMasterCommandLine(HMaster.class).doMain(args);
3023 public HFileCleaner getHFileCleaner() {
3024 return this.hfileCleaner;
3027 public LogCleaner getLogCleaner() {
3028 return this.logCleaner;
3032 * @return the underlying snapshot manager
3034 @Override
3035 public SnapshotManager getSnapshotManager() {
3036 return this.snapshotManager;
3040 * @return the underlying MasterProcedureManagerHost
3042 @Override
3043 public MasterProcedureManagerHost getMasterProcedureManagerHost() {
3044 return mpmHost;
3047 @Override
3048 public ClusterSchema getClusterSchema() {
3049 return this.clusterSchemaService;
3053 * Create a new Namespace.
3054 * @param namespaceDescriptor descriptor for new Namespace
3055 * @param nonceGroup Identifier for the source of the request, a client or process.
3056 * @param nonce A unique identifier for this operation from the client or process identified by
3057 * <code>nonceGroup</code> (the source must ensure each operation gets a unique id).
3058 * @return procedure id
3060 long createNamespace(final NamespaceDescriptor namespaceDescriptor, final long nonceGroup,
3061 final long nonce) throws IOException {
3062 checkInitialized();
3064 TableName.isLegalNamespaceName(Bytes.toBytes(namespaceDescriptor.getName()));
3066 return MasterProcedureUtil.submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this,
3067 nonceGroup, nonce) {
3068 @Override
3069 protected void run() throws IOException {
3070 getMaster().getMasterCoprocessorHost().preCreateNamespace(namespaceDescriptor);
3071 // We need to wait for the procedure to potentially fail due to "prepare" sanity
3072 // checks. This will block only the beginning of the procedure. See HBASE-19953.
3073 ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
3074 LOG.info(getClientIdAuditPrefix() + " creating " + namespaceDescriptor);
3075 // Execute the operation synchronously - wait for the operation to complete before
3076 // continuing.
3077 setProcId(getClusterSchema().createNamespace(namespaceDescriptor, getNonceKey(), latch));
3078 latch.await();
3079 getMaster().getMasterCoprocessorHost().postCreateNamespace(namespaceDescriptor);
3082 @Override
3083 protected String getDescription() {
3084 return "CreateNamespaceProcedure";
3090 * Modify an existing Namespace.
3091 * @param nonceGroup Identifier for the source of the request, a client or process.
3092 * @param nonce A unique identifier for this operation from the client or process identified by
3093 * <code>nonceGroup</code> (the source must ensure each operation gets a unique id).
3094 * @return procedure id
3096 long modifyNamespace(final NamespaceDescriptor newNsDescriptor, final long nonceGroup,
3097 final long nonce) throws IOException {
3098 checkInitialized();
3100 TableName.isLegalNamespaceName(Bytes.toBytes(newNsDescriptor.getName()));
3102 return MasterProcedureUtil.submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this,
3103 nonceGroup, nonce) {
3104 @Override
3105 protected void run() throws IOException {
3106 NamespaceDescriptor oldNsDescriptor = getNamespace(newNsDescriptor.getName());
3107 getMaster().getMasterCoprocessorHost().preModifyNamespace(oldNsDescriptor, newNsDescriptor);
3108 // We need to wait for the procedure to potentially fail due to "prepare" sanity
3109 // checks. This will block only the beginning of the procedure. See HBASE-19953.
3110 ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
3111 LOG.info(getClientIdAuditPrefix() + " modify " + newNsDescriptor);
3112 // Execute the operation synchronously - wait for the operation to complete before
3113 // continuing.
3114 setProcId(getClusterSchema().modifyNamespace(newNsDescriptor, getNonceKey(), latch));
3115 latch.await();
3116 getMaster().getMasterCoprocessorHost().postModifyNamespace(oldNsDescriptor,
3117 newNsDescriptor);
3120 @Override
3121 protected String getDescription() {
3122 return "ModifyNamespaceProcedure";
3128 * Delete an existing Namespace. Only empty Namespaces (no tables) can be removed.
3129 * @param nonceGroup Identifier for the source of the request, a client or process.
3130 * @param nonce A unique identifier for this operation from the client or process identified by
3131 * <code>nonceGroup</code> (the source must ensure each operation gets a unique id).
3132 * @return procedure id
3134 long deleteNamespace(final String name, final long nonceGroup, final long nonce)
3135 throws IOException {
3136 checkInitialized();
3138 return MasterProcedureUtil.submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this,
3139 nonceGroup, nonce) {
3140 @Override
3141 protected void run() throws IOException {
3142 getMaster().getMasterCoprocessorHost().preDeleteNamespace(name);
3143 LOG.info(getClientIdAuditPrefix() + " delete " + name);
3144 // Execute the operation synchronously - wait for the operation to complete before
3145 // continuing.
3147 // We need to wait for the procedure to potentially fail due to "prepare" sanity
3148 // checks. This will block only the beginning of the procedure. See HBASE-19953.
3149 ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
3150 setProcId(submitProcedure(
3151 new DeleteNamespaceProcedure(procedureExecutor.getEnvironment(), name, latch)));
3152 latch.await();
3153 // Will not be invoked in the face of Exception thrown by the Procedure's execution
3154 getMaster().getMasterCoprocessorHost().postDeleteNamespace(name);
3157 @Override
3158 protected String getDescription() {
3159 return "DeleteNamespaceProcedure";
3165 * Get a Namespace
3166 * @param name Name of the Namespace
3167 * @return Namespace descriptor for <code>name</code>
3169 NamespaceDescriptor getNamespace(String name) throws IOException {
3170 checkInitialized();
3171 if (this.cpHost != null) this.cpHost.preGetNamespaceDescriptor(name);
3172 NamespaceDescriptor nsd = this.clusterSchemaService.getNamespace(name);
3173 if (this.cpHost != null) this.cpHost.postGetNamespaceDescriptor(nsd);
3174 return nsd;
3178 * Get all Namespaces
3179 * @return All Namespace descriptors
3181 List<NamespaceDescriptor> getNamespaces() throws IOException {
3182 checkInitialized();
3183 final List<NamespaceDescriptor> nsds = new ArrayList<>();
3184 if (cpHost != null) {
3185 cpHost.preListNamespaceDescriptors(nsds);
3187 nsds.addAll(this.clusterSchemaService.getNamespaces());
3188 if (this.cpHost != null) {
3189 this.cpHost.postListNamespaceDescriptors(nsds);
3191 return nsds;
3195 * List namespace names
3196 * @return All namespace names
3198 public List<String> listNamespaces() throws IOException {
3199 checkInitialized();
3200 List<String> namespaces = new ArrayList<>();
3201 if (cpHost != null) {
3202 cpHost.preListNamespaces(namespaces);
3204 for (NamespaceDescriptor namespace : clusterSchemaService.getNamespaces()) {
3205 namespaces.add(namespace.getName());
3207 if (cpHost != null) {
3208 cpHost.postListNamespaces(namespaces);
3210 return namespaces;
3213 @Override
3214 public List<TableName> listTableNamesByNamespace(String name) throws IOException {
3215 checkInitialized();
3216 return listTableNames(name, null, true);
3219 @Override
3220 public List<TableDescriptor> listTableDescriptorsByNamespace(String name) throws IOException {
3221 checkInitialized();
3222 return listTableDescriptors(name, null, null, true);
3225 @Override
3226 public boolean abortProcedure(final long procId, final boolean mayInterruptIfRunning)
3227 throws IOException {
3228 if (cpHost != null) {
3229 cpHost.preAbortProcedure(this.procedureExecutor, procId);
3232 final boolean result = this.procedureExecutor.abort(procId, mayInterruptIfRunning);
3234 if (cpHost != null) {
3235 cpHost.postAbortProcedure();
3238 return result;
3241 @Override
3242 public List<Procedure<?>> getProcedures() throws IOException {
3243 if (cpHost != null) {
3244 cpHost.preGetProcedures();
3247 @SuppressWarnings({ "unchecked", "rawtypes" })
3248 List<Procedure<?>> procList = (List) this.procedureExecutor.getProcedures();
3250 if (cpHost != null) {
3251 cpHost.postGetProcedures(procList);
3254 return procList;
3257 @Override
3258 public List<LockedResource> getLocks() throws IOException {
3259 if (cpHost != null) {
3260 cpHost.preGetLocks();
3263 MasterProcedureScheduler procedureScheduler =
3264 procedureExecutor.getEnvironment().getProcedureScheduler();
3266 final List<LockedResource> lockedResources = procedureScheduler.getLocks();
3268 if (cpHost != null) {
3269 cpHost.postGetLocks(lockedResources);
3272 return lockedResources;
3276 * Returns the list of table descriptors that match the specified request
3277 * @param namespace the namespace to query, or null if querying for all
3278 * @param regex The regular expression to match against, or null if querying for all
3279 * @param tableNameList the list of table names, or null if querying for all
3280 * @param includeSysTables False to match only against userspace tables
3281 * @return the list of table descriptors
3283 public List<TableDescriptor> listTableDescriptors(final String namespace, final String regex,
3284 final List<TableName> tableNameList, final boolean includeSysTables)
3285 throws IOException {
3286 List<TableDescriptor> htds = new ArrayList<>();
3287 if (cpHost != null) {
3288 cpHost.preGetTableDescriptors(tableNameList, htds, regex);
3290 htds = getTableDescriptors(htds, namespace, regex, tableNameList, includeSysTables);
3291 if (cpHost != null) {
3292 cpHost.postGetTableDescriptors(tableNameList, htds, regex);
3294 return htds;
3298 * Returns the list of table names that match the specified request
3299 * @param regex The regular expression to match against, or null if querying for all
3300 * @param namespace the namespace to query, or null if querying for all
3301 * @param includeSysTables False to match only against userspace tables
3302 * @return the list of table names
3304 public List<TableName> listTableNames(final String namespace, final String regex,
3305 final boolean includeSysTables) throws IOException {
3306 List<TableDescriptor> htds = new ArrayList<>();
3307 if (cpHost != null) {
3308 cpHost.preGetTableNames(htds, regex);
3310 htds = getTableDescriptors(htds, namespace, regex, null, includeSysTables);
3311 if (cpHost != null) {
3312 cpHost.postGetTableNames(htds, regex);
3314 List<TableName> result = new ArrayList<>(htds.size());
3315 for (TableDescriptor htd: htds) result.add(htd.getTableName());
3316 return result;
3320 * @return list of table table descriptors after filtering by regex and whether to include system
3321 * tables, etc.
3322 * @throws IOException
3324 private List<TableDescriptor> getTableDescriptors(final List<TableDescriptor> htds,
3325 final String namespace, final String regex, final List<TableName> tableNameList,
3326 final boolean includeSysTables)
3327 throws IOException {
3328 if (tableNameList == null || tableNameList.isEmpty()) {
3329 // request for all TableDescriptors
3330 Collection<TableDescriptor> allHtds;
3331 if (namespace != null && namespace.length() > 0) {
3332 // Do a check on the namespace existence. Will fail if does not exist.
3333 this.clusterSchemaService.getNamespace(namespace);
3334 allHtds = tableDescriptors.getByNamespace(namespace).values();
3335 } else {
3336 allHtds = tableDescriptors.getAll().values();
3338 for (TableDescriptor desc: allHtds) {
3339 if (tableStateManager.isTablePresent(desc.getTableName())
3340 && (includeSysTables || !desc.getTableName().isSystemTable())) {
3341 htds.add(desc);
3344 } else {
3345 for (TableName s: tableNameList) {
3346 if (tableStateManager.isTablePresent(s)) {
3347 TableDescriptor desc = tableDescriptors.get(s);
3348 if (desc != null) {
3349 htds.add(desc);
3355 // Retains only those matched by regular expression.
3356 if (regex != null) filterTablesByRegex(htds, Pattern.compile(regex));
3357 return htds;
3361 * Removes the table descriptors that don't match the pattern.
3362 * @param descriptors list of table descriptors to filter
3363 * @param pattern the regex to use
3365 private static void filterTablesByRegex(final Collection<TableDescriptor> descriptors,
3366 final Pattern pattern) {
3367 final String defaultNS = NamespaceDescriptor.DEFAULT_NAMESPACE_NAME_STR;
3368 Iterator<TableDescriptor> itr = descriptors.iterator();
3369 while (itr.hasNext()) {
3370 TableDescriptor htd = itr.next();
3371 String tableName = htd.getTableName().getNameAsString();
3372 boolean matched = pattern.matcher(tableName).matches();
3373 if (!matched && htd.getTableName().getNamespaceAsString().equals(defaultNS)) {
3374 matched = pattern.matcher(defaultNS + TableName.NAMESPACE_DELIM + tableName).matches();
3376 if (!matched) {
3377 itr.remove();
3382 @Override
3383 public long getLastMajorCompactionTimestamp(TableName table) throws IOException {
3384 return getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
3385 .getLastMajorCompactionTimestamp(table);
3388 @Override
3389 public long getLastMajorCompactionTimestampForRegion(byte[] regionName) throws IOException {
3390 return getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
3391 .getLastMajorCompactionTimestamp(regionName);
3395 * Gets the mob file compaction state for a specific table.
3396 * Whether all the mob files are selected is known during the compaction execution, but
3397 * the statistic is done just before compaction starts, it is hard to know the compaction
3398 * type at that time, so the rough statistics are chosen for the mob file compaction. Only two
3399 * compaction states are available, CompactionState.MAJOR_AND_MINOR and CompactionState.NONE.
3400 * @param tableName The current table name.
3401 * @return If a given table is in mob file compaction now.
3403 public CompactionState getMobCompactionState(TableName tableName) {
3404 AtomicInteger compactionsCount = mobCompactionStates.get(tableName);
3405 if (compactionsCount != null && compactionsCount.get() != 0) {
3406 return CompactionState.MAJOR_AND_MINOR;
3408 return CompactionState.NONE;
3411 public void reportMobCompactionStart(TableName tableName) throws IOException {
3412 IdLock.Entry lockEntry = null;
3413 try {
3414 lockEntry = mobCompactionLock.getLockEntry(tableName.hashCode());
3415 AtomicInteger compactionsCount = mobCompactionStates.get(tableName);
3416 if (compactionsCount == null) {
3417 compactionsCount = new AtomicInteger(0);
3418 mobCompactionStates.put(tableName, compactionsCount);
3420 compactionsCount.incrementAndGet();
3421 } finally {
3422 if (lockEntry != null) {
3423 mobCompactionLock.releaseLockEntry(lockEntry);
3428 public void reportMobCompactionEnd(TableName tableName) throws IOException {
3429 IdLock.Entry lockEntry = null;
3430 try {
3431 lockEntry = mobCompactionLock.getLockEntry(tableName.hashCode());
3432 AtomicInteger compactionsCount = mobCompactionStates.get(tableName);
3433 if (compactionsCount != null) {
3434 int count = compactionsCount.decrementAndGet();
3435 // remove the entry if the count is 0.
3436 if (count == 0) {
3437 mobCompactionStates.remove(tableName);
3440 } finally {
3441 if (lockEntry != null) {
3442 mobCompactionLock.releaseLockEntry(lockEntry);
3448 * Requests mob compaction.
3449 * @param tableName The table the compact.
3450 * @param columns The compacted columns.
3451 * @param allFiles Whether add all mob files into the compaction.
3453 public void requestMobCompaction(TableName tableName,
3454 List<ColumnFamilyDescriptor> columns, boolean allFiles) throws IOException {
3455 mobCompactThread.requestMobCompaction(conf, getFileSystem(), tableName, columns, allFiles);
3459 * Queries the state of the {@link LoadBalancerTracker}. If the balancer is not initialized,
3460 * false is returned.
3462 * @return The state of the load balancer, or false if the load balancer isn't defined.
3464 public boolean isBalancerOn() {
3465 return !isInMaintenanceMode()
3466 && loadBalancerTracker != null
3467 && loadBalancerTracker.isBalancerOn();
3471 * Queries the state of the {@link RegionNormalizerTracker}. If it's not initialized,
3472 * false is returned.
3474 public boolean isNormalizerOn() {
3475 return !isInMaintenanceMode()
3476 && regionNormalizerTracker != null
3477 && regionNormalizerTracker.isNormalizerOn();
3481 * Queries the state of the {@link SplitOrMergeTracker}. If it is not initialized,
3482 * false is returned. If switchType is illegal, false will return.
3483 * @param switchType see {@link org.apache.hadoop.hbase.client.MasterSwitchType}
3484 * @return The state of the switch
3486 @Override
3487 public boolean isSplitOrMergeEnabled(MasterSwitchType switchType) {
3488 return !isInMaintenanceMode()
3489 && splitOrMergeTracker != null
3490 && splitOrMergeTracker.isSplitOrMergeEnabled(switchType);
3494 * Fetch the configured {@link LoadBalancer} class name. If none is set, a default is returned.
3496 * @return The name of the {@link LoadBalancer} in use.
3498 public String getLoadBalancerClassName() {
3499 return conf.get(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, LoadBalancerFactory
3500 .getDefaultLoadBalancerClass().getName());
3504 * @return RegionNormalizerTracker instance
3506 public RegionNormalizerTracker getRegionNormalizerTracker() {
3507 return regionNormalizerTracker;
3510 public SplitOrMergeTracker getSplitOrMergeTracker() {
3511 return splitOrMergeTracker;
3514 @Override
3515 public LoadBalancer getLoadBalancer() {
3516 return balancer;
3519 @Override
3520 public FavoredNodesManager getFavoredNodesManager() {
3521 return favoredNodesManager;
3524 private long executePeerProcedure(AbstractPeerProcedure<?> procedure) throws IOException {
3525 long procId = procedureExecutor.submitProcedure(procedure);
3526 procedure.getLatch().await();
3527 return procId;
3530 @Override
3531 public long addReplicationPeer(String peerId, ReplicationPeerConfig peerConfig, boolean enabled)
3532 throws ReplicationException, IOException {
3533 LOG.info(getClientIdAuditPrefix() + " creating replication peer, id=" + peerId + ", config=" +
3534 peerConfig + ", state=" + (enabled ? "ENABLED" : "DISABLED"));
3535 return executePeerProcedure(new AddPeerProcedure(peerId, peerConfig, enabled));
3538 @Override
3539 public long removeReplicationPeer(String peerId) throws ReplicationException, IOException {
3540 LOG.info(getClientIdAuditPrefix() + " removing replication peer, id=" + peerId);
3541 return executePeerProcedure(new RemovePeerProcedure(peerId));
3544 @Override
3545 public long enableReplicationPeer(String peerId) throws ReplicationException, IOException {
3546 LOG.info(getClientIdAuditPrefix() + " enable replication peer, id=" + peerId);
3547 return executePeerProcedure(new EnablePeerProcedure(peerId));
3550 @Override
3551 public long disableReplicationPeer(String peerId) throws ReplicationException, IOException {
3552 LOG.info(getClientIdAuditPrefix() + " disable replication peer, id=" + peerId);
3553 return executePeerProcedure(new DisablePeerProcedure(peerId));
3556 @Override
3557 public ReplicationPeerConfig getReplicationPeerConfig(String peerId)
3558 throws ReplicationException, IOException {
3559 if (cpHost != null) {
3560 cpHost.preGetReplicationPeerConfig(peerId);
3562 LOG.info(getClientIdAuditPrefix() + " get replication peer config, id=" + peerId);
3563 ReplicationPeerConfig peerConfig = this.replicationPeerManager.getPeerConfig(peerId)
3564 .orElseThrow(() -> new ReplicationPeerNotFoundException(peerId));
3565 if (cpHost != null) {
3566 cpHost.postGetReplicationPeerConfig(peerId);
3568 return peerConfig;
3571 @Override
3572 public long updateReplicationPeerConfig(String peerId, ReplicationPeerConfig peerConfig)
3573 throws ReplicationException, IOException {
3574 LOG.info(getClientIdAuditPrefix() + " update replication peer config, id=" + peerId +
3575 ", config=" + peerConfig);
3576 return executePeerProcedure(new UpdatePeerConfigProcedure(peerId, peerConfig));
3579 @Override
3580 public List<ReplicationPeerDescription> listReplicationPeers(String regex)
3581 throws ReplicationException, IOException {
3582 if (cpHost != null) {
3583 cpHost.preListReplicationPeers(regex);
3585 LOG.debug("{} list replication peers, regex={}", getClientIdAuditPrefix(), regex);
3586 Pattern pattern = regex == null ? null : Pattern.compile(regex);
3587 List<ReplicationPeerDescription> peers =
3588 this.replicationPeerManager.listPeers(pattern);
3589 if (cpHost != null) {
3590 cpHost.postListReplicationPeers(regex);
3592 return peers;
3595 @Override
3596 public long transitReplicationPeerSyncReplicationState(String peerId, SyncReplicationState state)
3597 throws ReplicationException, IOException {
3598 LOG.info(
3599 getClientIdAuditPrefix() +
3600 " transit current cluster state to {} in a synchronous replication peer id={}",
3601 state, peerId);
3602 return executePeerProcedure(new TransitPeerSyncReplicationStateProcedure(peerId, state));
3606 * Mark region server(s) as decommissioned (previously called 'draining') to prevent additional
3607 * regions from getting assigned to them. Also unload the regions on the servers asynchronously.0
3608 * @param servers Region servers to decommission.
3610 public void decommissionRegionServers(final List<ServerName> servers, final boolean offload)
3611 throws HBaseIOException {
3612 List<ServerName> serversAdded = new ArrayList<>(servers.size());
3613 // Place the decommission marker first.
3614 String parentZnode = getZooKeeper().getZNodePaths().drainingZNode;
3615 for (ServerName server : servers) {
3616 try {
3617 String node = ZNodePaths.joinZNode(parentZnode, server.getServerName());
3618 ZKUtil.createAndFailSilent(getZooKeeper(), node);
3619 } catch (KeeperException ke) {
3620 throw new HBaseIOException(
3621 this.zooKeeper.prefix("Unable to decommission '" + server.getServerName() + "'."), ke);
3623 if (this.serverManager.addServerToDrainList(server)) {
3624 serversAdded.add(server);
3627 // Move the regions off the decommissioned servers.
3628 if (offload) {
3629 final List<ServerName> destServers = this.serverManager.createDestinationServersList();
3630 for (ServerName server : serversAdded) {
3631 final List<RegionInfo> regionsOnServer = this.assignmentManager.getRegionsOnServer(server);
3632 for (RegionInfo hri : regionsOnServer) {
3633 ServerName dest = balancer.randomAssignment(hri, destServers);
3634 if (dest == null) {
3635 throw new HBaseIOException("Unable to determine a plan to move " + hri);
3637 RegionPlan rp = new RegionPlan(hri, server, dest);
3638 this.assignmentManager.moveAsync(rp);
3645 * List region servers marked as decommissioned (previously called 'draining') to not get regions
3646 * assigned to them.
3647 * @return List of decommissioned servers.
3649 public List<ServerName> listDecommissionedRegionServers() {
3650 return this.serverManager.getDrainingServersList();
3654 * Remove decommission marker (previously called 'draining') from a region server to allow regions
3655 * assignments. Load regions onto the server asynchronously if a list of regions is given
3656 * @param server Region server to remove decommission marker from.
3658 public void recommissionRegionServer(final ServerName server,
3659 final List<byte[]> encodedRegionNames) throws IOException {
3660 // Remove the server from decommissioned (draining) server list.
3661 String parentZnode = getZooKeeper().getZNodePaths().drainingZNode;
3662 String node = ZNodePaths.joinZNode(parentZnode, server.getServerName());
3663 try {
3664 ZKUtil.deleteNodeFailSilent(getZooKeeper(), node);
3665 } catch (KeeperException ke) {
3666 throw new HBaseIOException(
3667 this.zooKeeper.prefix("Unable to recommission '" + server.getServerName() + "'."), ke);
3669 this.serverManager.removeServerFromDrainList(server);
3671 // Load the regions onto the server if we are given a list of regions.
3672 if (encodedRegionNames == null || encodedRegionNames.isEmpty()) {
3673 return;
3675 if (!this.serverManager.isServerOnline(server)) {
3676 return;
3678 for (byte[] encodedRegionName : encodedRegionNames) {
3679 RegionState regionState =
3680 assignmentManager.getRegionStates().getRegionState(Bytes.toString(encodedRegionName));
3681 if (regionState == null) {
3682 LOG.warn("Unknown region " + Bytes.toStringBinary(encodedRegionName));
3683 continue;
3685 RegionInfo hri = regionState.getRegion();
3686 if (server.equals(regionState.getServerName())) {
3687 LOG.info("Skipping move of region " + hri.getRegionNameAsString() +
3688 " because region already assigned to the same server " + server + ".");
3689 continue;
3691 RegionPlan rp = new RegionPlan(hri, regionState.getServerName(), server);
3692 this.assignmentManager.moveAsync(rp);
3696 @Override
3697 public LockManager getLockManager() {
3698 return lockManager;
3701 public QuotaObserverChore getQuotaObserverChore() {
3702 return this.quotaObserverChore;
3705 public SpaceQuotaSnapshotNotifier getSpaceQuotaSnapshotNotifier() {
3706 return this.spaceQuotaSnapshotNotifier;
3709 @SuppressWarnings("unchecked")
3710 private RemoteProcedure<MasterProcedureEnv, ?> getRemoteProcedure(long procId) {
3711 Procedure<?> procedure = procedureExecutor.getProcedure(procId);
3712 if (procedure == null) {
3713 return null;
3715 assert procedure instanceof RemoteProcedure;
3716 return (RemoteProcedure<MasterProcedureEnv, ?>) procedure;
3719 public void remoteProcedureCompleted(long procId) {
3720 LOG.debug("Remote procedure done, pid={}", procId);
3721 RemoteProcedure<MasterProcedureEnv, ?> procedure = getRemoteProcedure(procId);
3722 if (procedure != null) {
3723 procedure.remoteOperationCompleted(procedureExecutor.getEnvironment());
3727 public void remoteProcedureFailed(long procId, RemoteProcedureException error) {
3728 LOG.debug("Remote procedure failed, pid={}", procId, error);
3729 RemoteProcedure<MasterProcedureEnv, ?> procedure = getRemoteProcedure(procId);
3730 if (procedure != null) {
3731 procedure.remoteOperationFailed(procedureExecutor.getEnvironment(), error);
3736 * Reopen regions provided in the argument
3738 * @param tableName The current table name
3739 * @param regionNames The region names of the regions to reopen
3740 * @param nonceGroup Identifier for the source of the request, a client or process
3741 * @param nonce A unique identifier for this operation from the client or process identified by
3742 * <code>nonceGroup</code> (the source must ensure each operation gets a unique id).
3743 * @return procedure Id
3744 * @throws IOException if reopening region fails while running procedure
3746 long reopenRegions(final TableName tableName, final List<byte[]> regionNames,
3747 final long nonceGroup, final long nonce)
3748 throws IOException {
3750 return MasterProcedureUtil
3751 .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
3753 @Override
3754 protected void run() throws IOException {
3755 submitProcedure(new ReopenTableRegionsProcedure(tableName, regionNames));
3758 @Override
3759 protected String getDescription() {
3760 return "ReopenTableRegionsProcedure";
3767 @Override
3768 public ReplicationPeerManager getReplicationPeerManager() {
3769 return replicationPeerManager;
3772 public HashMap<String, List<Pair<ServerName, ReplicationLoadSource>>>
3773 getReplicationLoad(ServerName[] serverNames) {
3774 List<ReplicationPeerDescription> peerList = this.getReplicationPeerManager().listPeers(null);
3775 if (peerList == null) {
3776 return null;
3778 HashMap<String, List<Pair<ServerName, ReplicationLoadSource>>> replicationLoadSourceMap =
3779 new HashMap<>(peerList.size());
3780 peerList.stream()
3781 .forEach(peer -> replicationLoadSourceMap.put(peer.getPeerId(), new ArrayList<>()));
3782 for (ServerName serverName : serverNames) {
3783 List<ReplicationLoadSource> replicationLoadSources =
3784 getServerManager().getLoad(serverName).getReplicationLoadSourceList();
3785 for (ReplicationLoadSource replicationLoadSource : replicationLoadSources) {
3786 replicationLoadSourceMap.get(replicationLoadSource.getPeerID())
3787 .add(new Pair<>(serverName, replicationLoadSource));
3790 for (List<Pair<ServerName, ReplicationLoadSource>> loads : replicationLoadSourceMap.values()) {
3791 if (loads.size() > 0) {
3792 loads.sort(Comparator.comparingLong(load -> (-1) * load.getSecond().getReplicationLag()));
3795 return replicationLoadSourceMap;
3799 * This method modifies the master's configuration in order to inject replication-related features
3801 @VisibleForTesting
3802 public static void decorateMasterConfiguration(Configuration conf) {
3803 String plugins = conf.get(HBASE_MASTER_LOGCLEANER_PLUGINS);
3804 String cleanerClass = ReplicationLogCleaner.class.getCanonicalName();
3805 if (!plugins.contains(cleanerClass)) {
3806 conf.set(HBASE_MASTER_LOGCLEANER_PLUGINS, plugins + "," + cleanerClass);
3808 if (ReplicationUtils.isReplicationForBulkLoadDataEnabled(conf)) {
3809 plugins = conf.get(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS);
3810 cleanerClass = ReplicationHFileCleaner.class.getCanonicalName();
3811 if (!plugins.contains(cleanerClass)) {
3812 conf.set(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, plugins + "," + cleanerClass);
3817 public SnapshotQuotaObserverChore getSnapshotQuotaObserverChore() {
3818 return this.snapshotQuotaChore;
3821 @Override
3822 public SyncReplicationReplayWALManager getSyncReplicationReplayWALManager() {
3823 return this.syncReplicationReplayWALManager;
3826 @Override
3827 public Map<String, ReplicationStatus> getWalGroupsReplicationStatus() {
3828 if (!this.isOnline() || !LoadBalancer.isMasterCanHostUserRegions(conf)) {
3829 return new HashMap<>();
3831 return super.getWalGroupsReplicationStatus();
3834 public HbckChore getHbckChore() {
3835 return this.hbckChore;
3838 @Override
3839 public String getClusterId() {
3840 if (activeMaster) {
3841 return super.getClusterId();
3843 return cachedClusterId.getFromCacheOrFetch();
3846 @Override
3847 public void runReplicationBarrierCleaner() {
3848 ReplicationBarrierCleaner rbc = this.replicationBarrierCleaner;
3849 if (rbc != null) {
3850 rbc.chore();