hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java

   1 /**
   2  * Licensed to the Apache Software Foundation (ASF) under one
   3  * or more contributor license agreements.  See the NOTICE file
   4  * distributed with this work for additional information
   5  * regarding copyright ownership.  The ASF licenses this file
   6  * to you under the Apache License, Version 2.0 (the
   7  * "License"); you may not use this file except in compliance
   8  * with the License.  You may obtain a copy of the License at
   9  *
  10  *     http://www.apache.org/licenses/LICENSE-2.0
  11  *
  12  * Unless required by applicable law or agreed to in writing, software
  13  * distributed under the License is distributed on an "AS IS" BASIS,
  14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15  * See the License for the specific language governing permissions and
  16  * limitations under the License.
  17  */
  18 package org.apache.hadoop.hbase.master;
  19
  20 import static org.apache.hadoop.hbase.HConstants.DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK;
  21 import static org.apache.hadoop.hbase.HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS;
  22 import static org.apache.hadoop.hbase.HConstants.HBASE_SPLIT_WAL_COORDINATED_BY_ZK;
  23
  24 import com.google.protobuf.Descriptors;
  25 import com.google.protobuf.Service;
  26 import java.io.IOException;
  27 import java.io.InterruptedIOException;
  28 import java.lang.reflect.Constructor;
  29 import java.lang.reflect.InvocationTargetException;
  30 import java.net.InetAddress;
  31 import java.net.InetSocketAddress;
  32 import java.net.UnknownHostException;
  33 import java.util.ArrayList;
  34 import java.util.Arrays;
  35 import java.util.Collection;
  36 import java.util.Collections;
  37 import java.util.Comparator;
  38 import java.util.EnumSet;
  39 import java.util.HashMap;
  40 import java.util.Iterator;
  41 import java.util.List;
  42 import java.util.Map;
  43 import java.util.Map.Entry;
  44 import java.util.Objects;
  45 import java.util.Optional;
  46 import java.util.Set;
  47 import java.util.concurrent.ExecutionException;
  48 import java.util.concurrent.Future;
  49 import java.util.concurrent.TimeUnit;
  50 import java.util.concurrent.TimeoutException;
  51 import java.util.concurrent.atomic.AtomicInteger;
  52 import java.util.function.Function;
  53 import java.util.regex.Pattern;
  54 import java.util.stream.Collectors;
  55 import javax.servlet.ServletException;
  56 import javax.servlet.http.HttpServlet;
  57 import javax.servlet.http.HttpServletRequest;
  58 import javax.servlet.http.HttpServletResponse;
  59 import org.apache.commons.lang3.StringUtils;
  60 import org.apache.hadoop.conf.Configuration;
  61 import org.apache.hadoop.fs.Path;
  62 import org.apache.hadoop.hbase.ChoreService;
  63 import org.apache.hadoop.hbase.ClusterId;
  64 import org.apache.hadoop.hbase.ClusterMetrics;
  65 import org.apache.hadoop.hbase.ClusterMetrics.Option;
  66 import org.apache.hadoop.hbase.ClusterMetricsBuilder;
  67 import org.apache.hadoop.hbase.DoNotRetryIOException;
  68 import org.apache.hadoop.hbase.HBaseIOException;
  69 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
  70 import org.apache.hadoop.hbase.HConstants;
  71 import org.apache.hadoop.hbase.InvalidFamilyOperationException;
  72 import org.apache.hadoop.hbase.MasterNotRunningException;
  73 import org.apache.hadoop.hbase.MetaTableAccessor;
  74 import org.apache.hadoop.hbase.NamespaceDescriptor;
  75 import org.apache.hadoop.hbase.PleaseHoldException;
  76 import org.apache.hadoop.hbase.ReplicationPeerNotFoundException;
  77 import org.apache.hadoop.hbase.ServerName;
  78 import org.apache.hadoop.hbase.TableName;
  79 import org.apache.hadoop.hbase.TableNotDisabledException;
  80 import org.apache.hadoop.hbase.TableNotFoundException;
  81 import org.apache.hadoop.hbase.UnknownRegionException;
  82 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
  83 import org.apache.hadoop.hbase.client.MasterSwitchType;
  84 import org.apache.hadoop.hbase.client.RegionInfo;
  85 import org.apache.hadoop.hbase.client.RegionInfoBuilder;
  86 import org.apache.hadoop.hbase.client.RegionStatesCount;
  87 import org.apache.hadoop.hbase.client.TableDescriptor;
  88 import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
  89 import org.apache.hadoop.hbase.client.TableState;
  90 import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
  91 import org.apache.hadoop.hbase.exceptions.DeserializationException;
  92 import org.apache.hadoop.hbase.executor.ExecutorType;
  93 import org.apache.hadoop.hbase.favored.FavoredNodesManager;
  94 import org.apache.hadoop.hbase.favored.FavoredNodesPromoter;
  95 import org.apache.hadoop.hbase.http.InfoServer;
  96 import org.apache.hadoop.hbase.ipc.CoprocessorRpcUtils;
  97 import org.apache.hadoop.hbase.ipc.RpcServer;
  98 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
  99 import org.apache.hadoop.hbase.log.HBaseMarkers;
 100 import org.apache.hadoop.hbase.master.MasterRpcServices.BalanceSwitchMode;
 101 import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
 102 import org.apache.hadoop.hbase.master.assignment.MergeTableRegionsProcedure;
 103 import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
 104 import org.apache.hadoop.hbase.master.assignment.RegionStates;
 105 import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
 106 import org.apache.hadoop.hbase.master.balancer.BalancerChore;
 107 import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
 108 import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore;
 109 import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
 110 import org.apache.hadoop.hbase.master.cleaner.DirScanPool;
 111 import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
 112 import org.apache.hadoop.hbase.master.cleaner.LogCleaner;
 113 import org.apache.hadoop.hbase.master.cleaner.ReplicationBarrierCleaner;
 114 import org.apache.hadoop.hbase.master.cleaner.SnapshotCleanerChore;
 115 import org.apache.hadoop.hbase.master.locking.LockManager;
 116 import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan;
 117 import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan.PlanType;
 118 import org.apache.hadoop.hbase.master.normalizer.RegionNormalizer;
 119 import org.apache.hadoop.hbase.master.normalizer.RegionNormalizerChore;
 120 import org.apache.hadoop.hbase.master.normalizer.RegionNormalizerFactory;
 121 import org.apache.hadoop.hbase.master.procedure.CreateTableProcedure;
 122 import org.apache.hadoop.hbase.master.procedure.DeleteNamespaceProcedure;
 123 import org.apache.hadoop.hbase.master.procedure.DeleteTableProcedure;
 124 import org.apache.hadoop.hbase.master.procedure.DisableTableProcedure;
 125 import org.apache.hadoop.hbase.master.procedure.EnableTableProcedure;
 126 import org.apache.hadoop.hbase.master.procedure.InitMetaProcedure;
 127 import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants;
 128 import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
 129 import org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler;
 130 import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil;
 131 import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil.NonceProcedureRunnable;
 132 import org.apache.hadoop.hbase.master.procedure.ModifyTableProcedure;
 133 import org.apache.hadoop.hbase.master.procedure.ProcedurePrepareLatch;
 134 import org.apache.hadoop.hbase.master.procedure.ProcedureSyncWait;
 135 import org.apache.hadoop.hbase.master.procedure.ReopenTableRegionsProcedure;
 136 import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
 137 import org.apache.hadoop.hbase.master.procedure.TruncateTableProcedure;
 138 import org.apache.hadoop.hbase.master.replication.AbstractPeerProcedure;
 139 import org.apache.hadoop.hbase.master.replication.AddPeerProcedure;
 140 import org.apache.hadoop.hbase.master.replication.DisablePeerProcedure;
 141 import org.apache.hadoop.hbase.master.replication.EnablePeerProcedure;
 142 import org.apache.hadoop.hbase.master.replication.RemovePeerProcedure;
 143 import org.apache.hadoop.hbase.master.replication.ReplicationPeerManager;
 144 import org.apache.hadoop.hbase.master.replication.SyncReplicationReplayWALManager;
 145 import org.apache.hadoop.hbase.master.replication.TransitPeerSyncReplicationStateProcedure;
 146 import org.apache.hadoop.hbase.master.replication.UpdatePeerConfigProcedure;
 147 import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
 148 import org.apache.hadoop.hbase.master.zksyncer.MasterAddressSyncer;
 149 import org.apache.hadoop.hbase.master.zksyncer.MetaLocationSyncer;
 150 import org.apache.hadoop.hbase.mob.MobConstants;
 151 import org.apache.hadoop.hbase.monitoring.MemoryBoundedLogMessageBuffer;
 152 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
 153 import org.apache.hadoop.hbase.monitoring.TaskMonitor;
 154 import org.apache.hadoop.hbase.procedure.MasterProcedureManagerHost;
 155 import org.apache.hadoop.hbase.procedure.flush.MasterFlushTableProcedureManager;
 156 import org.apache.hadoop.hbase.procedure2.LockedResource;
 157 import org.apache.hadoop.hbase.procedure2.Procedure;
 158 import org.apache.hadoop.hbase.procedure2.ProcedureEvent;
 159 import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
 160 import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteProcedure;
 161 import org.apache.hadoop.hbase.procedure2.RemoteProcedureException;
 162 import org.apache.hadoop.hbase.procedure2.store.ProcedureStore;
 163 import org.apache.hadoop.hbase.procedure2.store.ProcedureStore.ProcedureStoreListener;
 164 import org.apache.hadoop.hbase.procedure2.store.region.RegionProcedureStore;
 165 import org.apache.hadoop.hbase.quotas.MasterQuotaManager;
 166 import org.apache.hadoop.hbase.quotas.MasterQuotasObserver;
 167 import org.apache.hadoop.hbase.quotas.QuotaObserverChore;
 168 import org.apache.hadoop.hbase.quotas.QuotaTableUtil;
 169 import org.apache.hadoop.hbase.quotas.QuotaUtil;
 170 import org.apache.hadoop.hbase.quotas.SnapshotQuotaObserverChore;
 171 import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshot;
 172 import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshot.SpaceQuotaStatus;
 173 import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshotNotifier;
 174 import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshotNotifierFactory;
 175 import org.apache.hadoop.hbase.quotas.SpaceViolationPolicy;
 176 import org.apache.hadoop.hbase.regionserver.HRegionServer;
 177 import org.apache.hadoop.hbase.regionserver.RSRpcServices;
 178 import org.apache.hadoop.hbase.replication.ReplicationException;
 179 import org.apache.hadoop.hbase.replication.ReplicationLoadSource;
 180 import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
 181 import org.apache.hadoop.hbase.replication.ReplicationPeerDescription;
 182 import org.apache.hadoop.hbase.replication.ReplicationUtils;
 183 import org.apache.hadoop.hbase.replication.SyncReplicationState;
 184 import org.apache.hadoop.hbase.replication.master.ReplicationHFileCleaner;
 185 import org.apache.hadoop.hbase.replication.master.ReplicationLogCleaner;
 186 import org.apache.hadoop.hbase.replication.master.ReplicationPeerConfigUpgrader;
 187 import org.apache.hadoop.hbase.replication.regionserver.ReplicationStatus;
 188 import org.apache.hadoop.hbase.security.AccessDeniedException;
 189 import org.apache.hadoop.hbase.security.SecurityConstants;
 190 import org.apache.hadoop.hbase.security.UserProvider;
 191 import org.apache.hadoop.hbase.trace.TraceUtil;
 192 import org.apache.hadoop.hbase.util.Addressing;
 193 import org.apache.hadoop.hbase.util.Bytes;
 194 import org.apache.hadoop.hbase.util.FutureUtils;
 195 import org.apache.hadoop.hbase.util.HBaseFsck;
 196 import org.apache.hadoop.hbase.util.HFileArchiveUtil;
 197 import org.apache.hadoop.hbase.util.HasThread;
 198 import org.apache.hadoop.hbase.util.IdLock;
 199 import org.apache.hadoop.hbase.util.ModifyRegionUtils;
 200 import org.apache.hadoop.hbase.util.Pair;
 201 import org.apache.hadoop.hbase.util.RetryCounter;
 202 import org.apache.hadoop.hbase.util.RetryCounterFactory;
 203 import org.apache.hadoop.hbase.util.TableDescriptorChecker;
 204 import org.apache.hadoop.hbase.util.Threads;
 205 import org.apache.hadoop.hbase.util.VersionInfo;
 206 import org.apache.hadoop.hbase.zookeeper.LoadBalancerTracker;
 207 import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
 208 import org.apache.hadoop.hbase.zookeeper.RegionNormalizerTracker;
 209 import org.apache.hadoop.hbase.zookeeper.SnapshotCleanupTracker;
 210 import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
 211 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
 212 import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
 213 import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
 214 import org.apache.yetus.audience.InterfaceAudience;
 215 import org.apache.zookeeper.KeeperException;
 216 import org.eclipse.jetty.server.Server;
 217 import org.eclipse.jetty.server.ServerConnector;
 218 import org.eclipse.jetty.servlet.ServletHolder;
 219 import org.eclipse.jetty.webapp.WebAppContext;
 220 import org.slf4j.Logger;
 221 import org.slf4j.LoggerFactory;
 222
 223 import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
 224 import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
 225 import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
 226
 227 import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
 228 import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter;
 229 import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
 230 import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
 231
 232 /**
 233  * HMaster is the "master server" for HBase. An HBase cluster has one active
 234  * master.  If many masters are started, all compete.  Whichever wins goes on to
 235  * run the cluster.  All others park themselves in their constructor until
 236  * master or cluster shutdown or until the active master loses its lease in
 237  * zookeeper.  Thereafter, all running master jostle to take over master role.
 238  *
 239  * <p>The Master can be asked shutdown the cluster. See {@link #shutdown()}.  In
 240  * this case it will tell all regionservers to go down and then wait on them
 241  * all reporting in that they are down.  This master will then shut itself down.
 242  *
 243  * <p>You can also shutdown just this master.  Call {@link #stopMaster()}.
 244  *
 245  * @see org.apache.zookeeper.Watcher
 246  */
 247 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
 248 @SuppressWarnings("deprecation")
 249 public class HMaster extends HRegionServer implements MasterServices {
 250   private static Logger LOG = LoggerFactory.getLogger(HMaster.class);
 251
 252   /**
 253    * Protection against zombie master. Started once Master accepts active responsibility and
 254    * starts taking over responsibilities. Allows a finite time window before giving up ownership.
 255    */
 256   private static class InitializationMonitor extends HasThread {
 257     /** The amount of time in milliseconds to sleep before checking initialization status. */
 258     public static final String TIMEOUT_KEY = "hbase.master.initializationmonitor.timeout";
 259     public static final long TIMEOUT_DEFAULT = TimeUnit.MILLISECONDS.convert(15, TimeUnit.MINUTES);
 260
 261     /**
 262      * When timeout expired and initialization has not complete, call {@link System#exit(int)} when
 263      * true, do nothing otherwise.
 264      */
 265     public static final String HALT_KEY = "hbase.master.initializationmonitor.haltontimeout";
 266     public static final boolean HALT_DEFAULT = false;
 267
 268     private final HMaster master;
 269     private final long timeout;
 270     private final boolean haltOnTimeout;
 271
 272     /** Creates a Thread that monitors the {@link #isInitialized()} state. */
 273     InitializationMonitor(HMaster master) {
 274       super("MasterInitializationMonitor");
 275       this.master = master;
 276       this.timeout = master.getConfiguration().getLong(TIMEOUT_KEY, TIMEOUT_DEFAULT);
 277       this.haltOnTimeout = master.getConfiguration().getBoolean(HALT_KEY, HALT_DEFAULT);
 278       this.setDaemon(true);
 279     }
 280
 281     @Override
 282     public void run() {
 283       try {
 284         while (!master.isStopped() && master.isActiveMaster()) {
 285           Thread.sleep(timeout);
 286           if (master.isInitialized()) {
 287             LOG.debug("Initialization completed within allotted tolerance. Monitor exiting.");
 288           } else {
 289             LOG.error("Master failed to complete initialization after " + timeout + "ms. Please"
 290                 + " consider submitting a bug report including a thread dump of this process.");
 291             if (haltOnTimeout) {
 292               LOG.error("Zombie Master exiting. Thread dump to stdout");
 293               Threads.printThreadInfo(System.out, "Zombie HMaster");
 294               System.exit(-1);
 295             }
 296           }
 297         }
 298       } catch (InterruptedException ie) {
 299         LOG.trace("InitMonitor thread interrupted. Existing.");
 300       }
 301     }
 302   }
 303
 304   // MASTER is name of the webapp and the attribute name used stuffing this
 305   //instance into web context.
 306   public static final String MASTER = "master";
 307
 308   // Manager and zk listener for master election
 309   private final ActiveMasterManager activeMasterManager;
 310   // Region server tracker
 311   private RegionServerTracker regionServerTracker;
 312   // Draining region server tracker
 313   private DrainingServerTracker drainingServerTracker;
 314   // Tracker for load balancer state
 315   LoadBalancerTracker loadBalancerTracker;
 316   // Tracker for meta location, if any client ZK quorum specified
 317   MetaLocationSyncer metaLocationSyncer;
 318   // Tracker for active master location, if any client ZK quorum specified
 319   MasterAddressSyncer masterAddressSyncer;
 320   // Tracker for auto snapshot cleanup state
 321   SnapshotCleanupTracker snapshotCleanupTracker;
 322
 323   // Tracker for split and merge state
 324   private SplitOrMergeTracker splitOrMergeTracker;
 325
 326   // Tracker for region normalizer state
 327   private RegionNormalizerTracker regionNormalizerTracker;
 328
 329   private ClusterSchemaService clusterSchemaService;
 330
 331   public static final String HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS =
 332     "hbase.master.wait.on.service.seconds";
 333   public static final int DEFAULT_HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS = 5 * 60;
 334
 335   public static final String HBASE_MASTER_CLEANER_INTERVAL = "hbase.master.cleaner.interval";
 336
 337   public static final int DEFAULT_HBASE_MASTER_CLEANER_INTERVAL = 600 * 1000;
 338
 339   // Metrics for the HMaster
 340   final MetricsMaster metricsMaster;
 341   // file system manager for the master FS operations
 342   private MasterFileSystem fileSystemManager;
 343   private MasterWalManager walManager;
 344
 345   // manager to manage procedure-based WAL splitting, can be null if current
 346   // is zk-based WAL splitting. SplitWALManager will replace SplitLogManager
 347   // and MasterWalManager, which means zk-based WAL splitting code will be
 348   // useless after we switch to the procedure-based one. our eventual goal
 349   // is to remove all the zk-based WAL splitting code.
 350   private SplitWALManager splitWALManager;
 351
 352   // server manager to deal with region server info
 353   private volatile ServerManager serverManager;
 354
 355   // manager of assignment nodes in zookeeper
 356   private AssignmentManager assignmentManager;
 357
 358   // manager of replication
 359   private ReplicationPeerManager replicationPeerManager;
 360
 361   private SyncReplicationReplayWALManager syncReplicationReplayWALManager;
 362
 363   // buffer for "fatal error" notices from region servers
 364   // in the cluster. This is only used for assisting
 365   // operations/debugging.
 366   MemoryBoundedLogMessageBuffer rsFatals;
 367
 368   // flag set after we become the active master (used for testing)
 369   private volatile boolean activeMaster = false;
 370
 371   // flag set after we complete initialization once active
 372   private final ProcedureEvent<?> initialized = new ProcedureEvent<>("master initialized");
 373
 374   // flag set after master services are started,
 375   // initialization may have not completed yet.
 376   volatile boolean serviceStarted = false;
 377
 378   // Maximum time we should run balancer for
 379   private final int maxBlancingTime;
 380   // Maximum percent of regions in transition when balancing
 381   private final double maxRitPercent;
 382
 383   private final LockManager lockManager = new LockManager(this);
 384
 385   private LoadBalancer balancer;
 386   private RegionNormalizer normalizer;
 387   private BalancerChore balancerChore;
 388   private RegionNormalizerChore normalizerChore;
 389   private ClusterStatusChore clusterStatusChore;
 390   private ClusterStatusPublisher clusterStatusPublisherChore = null;
 391   private SnapshotCleanerChore snapshotCleanerChore = null;
 392
 393   private HbckChore hbckChore;
 394   CatalogJanitor catalogJanitorChore;
 395   private DirScanPool cleanerPool;
 396   private LogCleaner logCleaner;
 397   private HFileCleaner hfileCleaner;
 398   private ReplicationBarrierCleaner replicationBarrierCleaner;
 399   private ExpiredMobFileCleanerChore expiredMobFileCleanerChore;
 400   private MobCompactionChore mobCompactChore;
 401   private MasterMobCompactionThread mobCompactThread;
 402   // used to synchronize the mobCompactionStates
 403   private final IdLock mobCompactionLock = new IdLock();
 404   // save the information of mob compactions in tables.
 405   // the key is table name, the value is the number of compactions in that table.
 406   private Map<TableName, AtomicInteger> mobCompactionStates = Maps.newConcurrentMap();
 407
 408   MasterCoprocessorHost cpHost;
 409
 410   private final boolean preLoadTableDescriptors;
 411
 412   // Time stamps for when a hmaster became active
 413   private long masterActiveTime;
 414
 415   // Time stamp for when HMaster finishes becoming Active Master
 416   private long masterFinishedInitializationTime;
 417
 418   Map<String, Service> coprocessorServiceHandlers = Maps.newHashMap();
 419
 420   // monitor for snapshot of hbase tables
 421   SnapshotManager snapshotManager;
 422   // monitor for distributed procedures
 423   private MasterProcedureManagerHost mpmHost;
 424
 425   private RegionsRecoveryChore regionsRecoveryChore = null;
 426
 427   private RegionsRecoveryConfigManager regionsRecoveryConfigManager = null;
 428   // it is assigned after 'initialized' guard set to true, so should be volatile
 429   private volatile MasterQuotaManager quotaManager;
 430   private SpaceQuotaSnapshotNotifier spaceQuotaSnapshotNotifier;
 431   private QuotaObserverChore quotaObserverChore;
 432   private SnapshotQuotaObserverChore snapshotQuotaChore;
 433
 434   private ProcedureExecutor<MasterProcedureEnv> procedureExecutor;
 435   private ProcedureStore procedureStore;
 436
 437   // handle table states
 438   private TableStateManager tableStateManager;
 439
 440   private long splitPlanCount;
 441   private long mergePlanCount;
 442
 443   /* Handle favored nodes information */
 444   private FavoredNodesManager favoredNodesManager;
 445
 446   /** jetty server for master to redirect requests to regionserver infoServer */
 447   private Server masterJettyServer;
 448
 449   // Determine if we should do normal startup or minimal "single-user" mode with no region
 450   // servers and no user tables. Useful for repair and recovery of hbase:meta
 451   private final boolean maintenanceMode;
 452   static final String MAINTENANCE_MODE = "hbase.master.maintenance_mode";
 453
 454   // Cached clusterId on stand by masters to serve clusterID requests from clients.
 455   private final CachedClusterId cachedClusterId;
 456
 457   public static class RedirectServlet extends HttpServlet {
 458     private static final long serialVersionUID = 2894774810058302473L;
 459     private final int regionServerInfoPort;
 460     private final String regionServerHostname;
 461
 462     /**
 463      * @param infoServer that we're trying to send all requests to
 464      * @param hostname may be null. if given, will be used for redirects instead of host from client.
 465      */
 466     public RedirectServlet(InfoServer infoServer, String hostname) {
 467        regionServerInfoPort = infoServer.getPort();
 468        regionServerHostname = hostname;
 469     }
 470
 471     @Override
 472     public void doGet(HttpServletRequest request,
 473         HttpServletResponse response) throws ServletException, IOException {
 474       String redirectHost = regionServerHostname;
 475       if(redirectHost == null) {
 476         redirectHost = request.getServerName();
 477         if(!Addressing.isLocalAddress(InetAddress.getByName(redirectHost))) {
 478           LOG.warn("Couldn't resolve '" + redirectHost + "' as an address local to this node and '" +
 479               MASTER_HOSTNAME_KEY + "' is not set; client will get an HTTP 400 response. If " +
 480               "your HBase deployment relies on client accessible names that the region server process " +
 481               "can't resolve locally, then you should set the previously mentioned configuration variable " +
 482               "to an appropriate hostname.");
 483           // no sending client provided input back to the client, so the goal host is just in the logs.
 484           response.sendError(400, "Request was to a host that I can't resolve for any of the network interfaces on " +
 485               "this node. If this is due to an intermediary such as an HTTP load balancer or other proxy, your HBase " +
 486               "administrator can set '" + MASTER_HOSTNAME_KEY + "' to point to the correct hostname.");
 487           return;
 488         }
 489       }
 490       // TODO this scheme should come from looking at the scheme registered in the infoserver's http server for the
 491       // host and port we're using, but it's buried way too deep to do that ATM.
 492       String redirectUrl = request.getScheme() + "://"
 493         + redirectHost + ":" + regionServerInfoPort
 494         + request.getRequestURI();
 495       response.sendRedirect(redirectUrl);
 496     }
 497   }
 498
 499   /**
 500    * Initializes the HMaster. The steps are as follows:
 501    * <p>
 502    * <ol>
 503    * <li>Initialize the local HRegionServer
 504    * <li>Start the ActiveMasterManager.
 505    * </ol>
 506    * <p>
 507    * Remaining steps of initialization occur in
 508    * {@link #finishActiveMasterInitialization(MonitoredTask)} after the master becomes the
 509    * active one.
 510    */
 511   public HMaster(final Configuration conf)
 512       throws IOException, KeeperException {
 513     super(conf);
 514     TraceUtil.initTracer(conf);
 515     try {
 516       if (conf.getBoolean(MAINTENANCE_MODE, false)) {
 517         LOG.info("Detected {}=true via configuration.", MAINTENANCE_MODE);
 518         maintenanceMode = true;
 519       } else if (Boolean.getBoolean(MAINTENANCE_MODE)) {
 520         LOG.info("Detected {}=true via environment variables.", MAINTENANCE_MODE);
 521         maintenanceMode = true;
 522       } else {
 523         maintenanceMode = false;
 524       }
 525
 526       this.rsFatals = new MemoryBoundedLogMessageBuffer(
 527           conf.getLong("hbase.master.buffer.for.rs.fatals", 1 * 1024 * 1024));
 528       LOG.info("hbase.rootdir={}, hbase.cluster.distributed={}", getDataRootDir(),
 529           this.conf.getBoolean(HConstants.CLUSTER_DISTRIBUTED, false));
 530
 531       // Disable usage of meta replicas in the master
 532       this.conf.setBoolean(HConstants.USE_META_REPLICAS, false);
 533
 534       decorateMasterConfiguration(this.conf);
 535
 536       // Hack! Maps DFSClient => Master for logs.  HDFS made this
 537       // config param for task trackers, but we can piggyback off of it.
 538       if (this.conf.get("mapreduce.task.attempt.id") == null) {
 539         this.conf.set("mapreduce.task.attempt.id", "hb_m_" + this.serverName.toString());
 540       }
 541
 542       this.metricsMaster = new MetricsMaster(new MetricsMasterWrapperImpl(this));
 543
 544       // preload table descriptor at startup
 545       this.preLoadTableDescriptors = conf.getBoolean("hbase.master.preload.tabledescriptors", true);
 546
 547       this.maxBlancingTime = getMaxBalancingTime();
 548       this.maxRitPercent = conf.getDouble(HConstants.HBASE_MASTER_BALANCER_MAX_RIT_PERCENT,
 549           HConstants.DEFAULT_HBASE_MASTER_BALANCER_MAX_RIT_PERCENT);
 550
 551       // Do we publish the status?
 552
 553       boolean shouldPublish = conf.getBoolean(HConstants.STATUS_PUBLISHED,
 554           HConstants.STATUS_PUBLISHED_DEFAULT);
 555       Class<? extends ClusterStatusPublisher.Publisher> publisherClass =
 556           conf.getClass(ClusterStatusPublisher.STATUS_PUBLISHER_CLASS,
 557               ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS,
 558               ClusterStatusPublisher.Publisher.class);
 559
 560       if (shouldPublish) {
 561         if (publisherClass == null) {
 562           LOG.warn(HConstants.STATUS_PUBLISHED + " is true, but " +
 563               ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS +
 564               " is not set - not publishing status");
 565         } else {
 566           clusterStatusPublisherChore = new ClusterStatusPublisher(this, conf, publisherClass);
 567           getChoreService().scheduleChore(clusterStatusPublisherChore);
 568         }
 569       }
 570
 571       // Some unit tests don't need a cluster, so no zookeeper at all
 572       if (!conf.getBoolean("hbase.testing.nocluster", false)) {
 573         this.activeMasterManager = new ActiveMasterManager(zooKeeper, this.serverName, this);
 574       } else {
 575         this.activeMasterManager = null;
 576       }
 577       cachedClusterId = new CachedClusterId(conf);
 578     } catch (Throwable t) {
 579       // Make sure we log the exception. HMaster is often started via reflection and the
 580       // cause of failed startup is lost.
 581       LOG.error("Failed construction of Master", t);
 582       throw t;
 583     }
 584   }
 585
 586   @Override
 587   protected String getUseThisHostnameInstead(Configuration conf) {
 588     return conf.get(MASTER_HOSTNAME_KEY);
 589   }
 590
 591   // Main run loop. Calls through to the regionserver run loop AFTER becoming active Master; will
 592   // block in here until then.
 593   @Override
 594   public void run() {
 595     try {
 596       if (!conf.getBoolean("hbase.testing.nocluster", false)) {
 597         Threads.setDaemonThreadRunning(new Thread(() -> {
 598           try {
 599             int infoPort = putUpJettyServer();
 600             startActiveMasterManager(infoPort);
 601           } catch (Throwable t) {
 602             // Make sure we log the exception.
 603             String error = "Failed to become Active Master";
 604             LOG.error(error, t);
 605             // Abort should have been called already.
 606             if (!isAborted()) {
 607               abort(error, t);
 608             }
 609           }
 610         }), getName() + ":becomeActiveMaster");
 611       }
 612       // Fall in here even if we have been aborted. Need to run the shutdown services and
 613       // the super run call will do this for us.
 614       super.run();
 615     } finally {
 616       if (this.clusterSchemaService != null) {
 617         // If on way out, then we are no longer active master.
 618         this.clusterSchemaService.stopAsync();
 619         try {
 620           this.clusterSchemaService.awaitTerminated(
 621               getConfiguration().getInt(HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS,
 622               DEFAULT_HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS), TimeUnit.SECONDS);
 623         } catch (TimeoutException te) {
 624           LOG.warn("Failed shutdown of clusterSchemaService", te);
 625         }
 626       }
 627       this.activeMaster = false;
 628     }
 629   }
 630
 631   // return the actual infoPort, -1 means disable info server.
 632   private int putUpJettyServer() throws IOException {
 633     if (!conf.getBoolean("hbase.master.infoserver.redirect", true)) {
 634       return -1;
 635     }
 636     final int infoPort = conf.getInt("hbase.master.info.port.orig",
 637       HConstants.DEFAULT_MASTER_INFOPORT);
 638     // -1 is for disabling info server, so no redirecting
 639     if (infoPort < 0 || infoServer == null) {
 640       return -1;
 641     }
 642     if(infoPort == infoServer.getPort()) {
 643       return infoPort;
 644     }
 645     final String addr = conf.get("hbase.master.info.bindAddress", "0.0.0.0");
 646     if (!Addressing.isLocalAddress(InetAddress.getByName(addr))) {
 647       String msg =
 648           "Failed to start redirecting jetty server. Address " + addr
 649               + " does not belong to this host. Correct configuration parameter: "
 650               + "hbase.master.info.bindAddress";
 651       LOG.error(msg);
 652       throw new IOException(msg);
 653     }
 654
 655     // TODO I'm pretty sure we could just add another binding to the InfoServer run by
 656     // the RegionServer and have it run the RedirectServlet instead of standing up
 657     // a second entire stack here.
 658     masterJettyServer = new Server();
 659     final ServerConnector connector = new ServerConnector(masterJettyServer);
 660     connector.setHost(addr);
 661     connector.setPort(infoPort);
 662     masterJettyServer.addConnector(connector);
 663     masterJettyServer.setStopAtShutdown(true);
 664
 665     final String redirectHostname =
 666         StringUtils.isBlank(useThisHostnameInstead) ? null : useThisHostnameInstead;
 667
 668     final RedirectServlet redirect = new RedirectServlet(infoServer, redirectHostname);
 669     final WebAppContext context = new WebAppContext(null, "/", null, null, null, null, WebAppContext.NO_SESSIONS);
 670     context.addServlet(new ServletHolder(redirect), "/*");
 671     context.setServer(masterJettyServer);
 672
 673     try {
 674       masterJettyServer.start();
 675     } catch (Exception e) {
 676       throw new IOException("Failed to start redirecting jetty server", e);
 677     }
 678     return connector.getLocalPort();
 679   }
 680
 681   @Override
 682   protected Function<TableDescriptorBuilder, TableDescriptorBuilder> getMetaTableObserver() {
 683     return builder -> builder.setRegionReplication(conf.getInt(HConstants.META_REPLICAS_NUM, HConstants.DEFAULT_META_REPLICA_NUM));
 684   }
 685   /**
 686    * For compatibility, if failed with regionserver credentials, try the master one
 687    */
 688   @Override
 689   protected void login(UserProvider user, String host) throws IOException {
 690     try {
 691       super.login(user, host);
 692     } catch (IOException ie) {
 693       user.login(SecurityConstants.MASTER_KRB_KEYTAB_FILE,
 694               SecurityConstants.MASTER_KRB_PRINCIPAL, host);
 695     }
 696   }
 697
 698   /**
 699    * If configured to put regions on active master,
 700    * wait till a backup master becomes active.
 701    * Otherwise, loop till the server is stopped or aborted.
 702    */
 703   @Override
 704   protected void waitForMasterActive(){
 705     if (maintenanceMode) {
 706       return;
 707     }
 708     boolean tablesOnMaster = LoadBalancer.isTablesOnMaster(conf);
 709     while (!(tablesOnMaster && activeMaster) && !isStopped() && !isAborted()) {
 710       sleeper.sleep();
 711     }
 712   }
 713
 714   @VisibleForTesting
 715   public MasterRpcServices getMasterRpcServices() {
 716     return (MasterRpcServices)rpcServices;
 717   }
 718
 719   public boolean balanceSwitch(final boolean b) throws IOException {
 720     return getMasterRpcServices().switchBalancer(b, BalanceSwitchMode.ASYNC);
 721   }
 722
 723   @Override
 724   protected String getProcessName() {
 725     return MASTER;
 726   }
 727
 728   @Override
 729   protected boolean canCreateBaseZNode() {
 730     return true;
 731   }
 732
 733   @Override
 734   protected boolean canUpdateTableDescriptor() {
 735     return true;
 736   }
 737
 738   @Override
 739   protected RSRpcServices createRpcServices() throws IOException {
 740     return new MasterRpcServices(this);
 741   }
 742
 743   @Override
 744   protected void configureInfoServer() {
 745     infoServer.addUnprivilegedServlet("master-status", "/master-status", MasterStatusServlet.class);
 746     infoServer.setAttribute(MASTER, this);
 747     if (LoadBalancer.isTablesOnMaster(conf)) {
 748       super.configureInfoServer();
 749     }
 750   }
 751
 752   @Override
 753   protected Class<? extends HttpServlet> getDumpServlet() {
 754     return MasterDumpServlet.class;
 755   }
 756
 757   @Override
 758   public MetricsMaster getMasterMetrics() {
 759     return metricsMaster;
 760   }
 761
 762   /**
 763    * <p>
 764    * Initialize all ZK based system trackers. But do not include {@link RegionServerTracker}, it
 765    * should have already been initialized along with {@link ServerManager}.
 766    * </p>
 767    * <p>
 768    * Will be overridden in tests.
 769    * </p>
 770    */
 771   @VisibleForTesting
 772   protected void initializeZKBasedSystemTrackers()
 773       throws IOException, InterruptedException, KeeperException, ReplicationException {
 774     this.balancer = LoadBalancerFactory.getLoadBalancer(conf);
 775     this.normalizer = RegionNormalizerFactory.getRegionNormalizer(conf);
 776     this.normalizer.setMasterServices(this);
 777     this.normalizer.setMasterRpcServices((MasterRpcServices)rpcServices);
 778     this.loadBalancerTracker = new LoadBalancerTracker(zooKeeper, this);
 779     this.loadBalancerTracker.start();
 780
 781     this.regionNormalizerTracker = new RegionNormalizerTracker(zooKeeper, this);
 782     this.regionNormalizerTracker.start();
 783
 784     this.splitOrMergeTracker = new SplitOrMergeTracker(zooKeeper, conf, this);
 785     this.splitOrMergeTracker.start();
 786
 787     this.replicationPeerManager = ReplicationPeerManager.create(zooKeeper, conf);
 788
 789     this.drainingServerTracker = new DrainingServerTracker(zooKeeper, this, this.serverManager);
 790     this.drainingServerTracker.start();
 791
 792     this.snapshotCleanupTracker = new SnapshotCleanupTracker(zooKeeper, this);
 793     this.snapshotCleanupTracker.start();
 794
 795     String clientQuorumServers = conf.get(HConstants.CLIENT_ZOOKEEPER_QUORUM);
 796     boolean clientZkObserverMode = conf.getBoolean(HConstants.CLIENT_ZOOKEEPER_OBSERVER_MODE,
 797       HConstants.DEFAULT_CLIENT_ZOOKEEPER_OBSERVER_MODE);
 798     if (clientQuorumServers != null && !clientZkObserverMode) {
 799       // we need to take care of the ZK information synchronization
 800       // if given client ZK are not observer nodes
 801       ZKWatcher clientZkWatcher = new ZKWatcher(conf,
 802           getProcessName() + ":" + rpcServices.getSocketAddress().getPort() + "-clientZK", this,
 803           false, true);
 804       this.metaLocationSyncer = new MetaLocationSyncer(zooKeeper, clientZkWatcher, this);
 805       this.metaLocationSyncer.start();
 806       this.masterAddressSyncer = new MasterAddressSyncer(zooKeeper, clientZkWatcher, this);
 807       this.masterAddressSyncer.start();
 808       // set cluster id is a one-go effort
 809       ZKClusterId.setClusterId(clientZkWatcher, fileSystemManager.getClusterId());
 810     }
 811
 812     // Set the cluster as up.  If new RSs, they'll be waiting on this before
 813     // going ahead with their startup.
 814     boolean wasUp = this.clusterStatusTracker.isClusterUp();
 815     if (!wasUp) this.clusterStatusTracker.setClusterUp();
 816
 817     LOG.info("Active/primary master=" + this.serverName +
 818         ", sessionid=0x" +
 819         Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId()) +
 820         ", setting cluster-up flag (Was=" + wasUp + ")");
 821
 822     // create/initialize the snapshot manager and other procedure managers
 823     this.snapshotManager = new SnapshotManager();
 824     this.mpmHost = new MasterProcedureManagerHost();
 825     this.mpmHost.register(this.snapshotManager);
 826     this.mpmHost.register(new MasterFlushTableProcedureManager());
 827     this.mpmHost.loadProcedures(conf);
 828     this.mpmHost.initialize(this, this.metricsMaster);
 829   }
 830
 831   // Will be overriden in test to inject customized AssignmentManager
 832   @VisibleForTesting
 833   protected AssignmentManager createAssignmentManager(MasterServices master) {
 834     return new AssignmentManager(master);
 835   }
 836
 837   /**
 838    * Finish initialization of HMaster after becoming the primary master.
 839    * <p/>
 840    * The startup order is a bit complicated but very important, do not change it unless you know
 841    * what you are doing.
 842    * <ol>
 843    * <li>Initialize file system based components - file system manager, wal manager, table
 844    * descriptors, etc</li>
 845    * <li>Publish cluster id</li>
 846    * <li>Here comes the most complicated part - initialize server manager, assignment manager and
 847    * region server tracker
 848    * <ol type='i'>
 849    * <li>Create server manager</li>
 850    * <li>Create procedure executor, load the procedures, but do not start workers. We will start it
 851    * later after we finish scheduling SCPs to avoid scheduling duplicated SCPs for the same
 852    * server</li>
 853    * <li>Create assignment manager and start it, load the meta region state, but do not load data
 854    * from meta region</li>
 855    * <li>Start region server tracker, construct the online servers set and find out dead servers and
 856    * schedule SCP for them. The online servers will be constructed by scanning zk, and we will also
 857    * scan the wal directory to find out possible live region servers, and the differences between
 858    * these two sets are the dead servers</li>
 859    * </ol>
 860    * </li>
 861    * <li>If this is a new deploy, schedule a InitMetaProcedure to initialize meta</li>
 862    * <li>Start necessary service threads - balancer, catalog janior, executor services, and also the
 863    * procedure executor, etc. Notice that the balancer must be created first as assignment manager
 864    * may use it when assigning regions.</li>
 865    * <li>Wait for meta to be initialized if necesssary, start table state manager.</li>
 866    * <li>Wait for enough region servers to check-in</li>
 867    * <li>Let assignment manager load data from meta and construct region states</li>
 868    * <li>Start all other things such as chore services, etc</li>
 869    * </ol>
 870    * <p/>
 871    * Notice that now we will not schedule a special procedure to make meta online(unless the first
 872    * time where meta has not been created yet), we will rely on SCP to bring meta online.
 873    */
 874   private void finishActiveMasterInitialization(MonitoredTask status) throws IOException,
 875           InterruptedException, KeeperException, ReplicationException {
 876     /*
 877      * We are active master now... go initialize components we need to run.
 878      */
 879     status.setStatus("Initializing Master file system");
 880
 881     this.masterActiveTime = System.currentTimeMillis();
 882     // TODO: Do this using Dependency Injection, using PicoContainer, Guice or Spring.
 883
 884     // always initialize the MemStoreLAB as we use a region to store procedure now.
 885     initializeMemStoreChunkCreator();
 886     this.fileSystemManager = new MasterFileSystem(conf);
 887     this.walManager = new MasterWalManager(this);
 888
 889     // enable table descriptors cache
 890     this.tableDescriptors.setCacheOn();
 891
 892     // warm-up HTDs cache on master initialization
 893     if (preLoadTableDescriptors) {
 894       status.setStatus("Pre-loading table descriptors");
 895       this.tableDescriptors.getAll();
 896     }
 897
 898     // Publish cluster ID; set it in Master too. The superclass RegionServer does this later but
 899     // only after it has checked in with the Master. At least a few tests ask Master for clusterId
 900     // before it has called its run method and before RegionServer has done the reportForDuty.
 901     ClusterId clusterId = fileSystemManager.getClusterId();
 902     status.setStatus("Publishing Cluster ID " + clusterId + " in ZooKeeper");
 903     ZKClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId());
 904     this.clusterId = clusterId.toString();
 905
 906     // Precaution. Put in place the old hbck1 lock file to fence out old hbase1s running their
 907     // hbck1s against an hbase2 cluster; it could do damage. To skip this behavior, set
 908     // hbase.write.hbck1.lock.file to false.
 909     if (this.conf.getBoolean("hbase.write.hbck1.lock.file", true)) {
 910       HBaseFsck.checkAndMarkRunningHbck(this.conf,
 911           HBaseFsck.createLockRetryCounterFactory(this.conf).create());
 912     }
 913
 914     status.setStatus("Initialize ServerManager and schedule SCP for crash servers");
 915     // The below two managers must be created before loading procedures, as they will be used during
 916     // loading.
 917     this.serverManager = createServerManager(this);
 918     this.syncReplicationReplayWALManager = new SyncReplicationReplayWALManager(this);
 919     if (!conf.getBoolean(HBASE_SPLIT_WAL_COORDINATED_BY_ZK,
 920       DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK)) {
 921       this.splitWALManager = new SplitWALManager(this);
 922     }
 923     createProcedureExecutor();
 924     Map<Class<?>, List<Procedure<MasterProcedureEnv>>> procsByType =
 925       procedureExecutor.getActiveProceduresNoCopy().stream()
 926         .collect(Collectors.groupingBy(p -> p.getClass()));
 927
 928     // Create Assignment Manager
 929     this.assignmentManager = createAssignmentManager(this);
 930     this.assignmentManager.start();
 931     // TODO: TRSP can perform as the sub procedure for other procedures, so even if it is marked as
 932     // completed, it could still be in the procedure list. This is a bit strange but is another
 933     // story, need to verify the implementation for ProcedureExecutor and ProcedureStore.
 934     List<TransitRegionStateProcedure> ritList =
 935       procsByType.getOrDefault(TransitRegionStateProcedure.class, Collections.emptyList()).stream()
 936         .filter(p -> !p.isFinished()).map(p -> (TransitRegionStateProcedure) p)
 937         .collect(Collectors.toList());
 938     this.assignmentManager.setupRIT(ritList);
 939
 940     // Start RegionServerTracker with listing of servers found with exiting SCPs -- these should
 941     // be registered in the deadServers set -- and with the list of servernames out on the
 942     // filesystem that COULD BE 'alive' (we'll schedule SCPs for each and let SCP figure it out).
 943     // We also pass dirs that are already 'splitting'... so we can do some checks down in tracker.
 944     // TODO: Generate the splitting and live Set in one pass instead of two as we currently do.
 945     this.regionServerTracker = new RegionServerTracker(zooKeeper, this, this.serverManager);
 946     this.regionServerTracker.start(
 947       procsByType.getOrDefault(ServerCrashProcedure.class, Collections.emptyList()).stream()
 948         .map(p -> (ServerCrashProcedure) p).map(p -> p.getServerName()).collect(Collectors.toSet()),
 949       walManager.getLiveServersFromWALDir(), walManager.getSplittingServersFromWALDir());
 950     // This manager will be started AFTER hbase:meta is confirmed on line.
 951     // hbase.mirror.table.state.to.zookeeper is so hbase1 clients can connect. They read table
 952     // state from zookeeper while hbase2 reads it from hbase:meta. Disable if no hbase1 clients.
 953     this.tableStateManager =
 954       this.conf.getBoolean(MirroringTableStateManager.MIRROR_TABLE_STATE_TO_ZK_KEY, true)
 955         ?
 956         new MirroringTableStateManager(this):
 957         new TableStateManager(this);
 958
 959     status.setStatus("Initializing ZK system trackers");
 960     initializeZKBasedSystemTrackers();
 961     status.setStatus("Loading last flushed sequence id of regions");
 962     try {
 963       this.serverManager.loadLastFlushedSequenceIds();
 964     } catch (IOException e) {
 965       LOG.info("Failed to load last flushed sequence id of regions"
 966           + " from file system", e);
 967     }
 968     // Set ourselves as active Master now our claim has succeeded up in zk.
 969     this.activeMaster = true;
 970
 971     // Start the Zombie master detector after setting master as active, see HBASE-21535
 972     Thread zombieDetector = new Thread(new InitializationMonitor(this),
 973         "ActiveMasterInitializationMonitor-" + System.currentTimeMillis());
 974     zombieDetector.setDaemon(true);
 975     zombieDetector.start();
 976
 977     // This is for backwards compatibility
 978     // See HBASE-11393
 979     status.setStatus("Update TableCFs node in ZNode");
 980     ReplicationPeerConfigUpgrader tableCFsUpdater =
 981         new ReplicationPeerConfigUpgrader(zooKeeper, conf);
 982     tableCFsUpdater.copyTableCFs();
 983
 984     if (!maintenanceMode) {
 985       // Add the Observer to delete quotas on table deletion before starting all CPs by
 986       // default with quota support, avoiding if user specifically asks to not load this Observer.
 987       if (QuotaUtil.isQuotaEnabled(conf)) {
 988         updateConfigurationForQuotasObserver(conf);
 989       }
 990       // initialize master side coprocessors before we start handling requests
 991       status.setStatus("Initializing master coprocessors");
 992       this.cpHost = new MasterCoprocessorHost(this, this.conf);
 993     }
 994
 995     // Checking if meta needs initializing.
 996     status.setStatus("Initializing meta table if this is a new deploy");
 997     InitMetaProcedure initMetaProc = null;
 998     // Print out state of hbase:meta on startup; helps debugging.
 999     RegionState rs = this.assignmentManager.getRegionStates().
1000         getRegionState(RegionInfoBuilder.FIRST_META_REGIONINFO);
1001     LOG.info("hbase:meta {}", rs);
1002     if (rs != null && rs.isOffline()) {
1003       Optional<InitMetaProcedure> optProc = procedureExecutor.getProcedures().stream()
1004         .filter(p -> p instanceof InitMetaProcedure).map(o -> (InitMetaProcedure) o).findAny();
1005       initMetaProc = optProc.orElseGet(() -> {
1006         // schedule an init meta procedure if meta has not been deployed yet
1007         InitMetaProcedure temp = new InitMetaProcedure();
1008         procedureExecutor.submitProcedure(temp);
1009         return temp;
1010       });
1011     }
1012     if (this.balancer instanceof FavoredNodesPromoter) {
1013       favoredNodesManager = new FavoredNodesManager(this);
1014     }
1015
1016     // initialize load balancer
1017     this.balancer.setMasterServices(this);
1018     this.balancer.setClusterMetrics(getClusterMetricsWithoutCoprocessor());
1019     this.balancer.initialize();
1020
1021     // start up all service threads.
1022     status.setStatus("Initializing master service threads");
1023     startServiceThreads();
1024     // wait meta to be initialized after we start procedure executor
1025     if (initMetaProc != null) {
1026       initMetaProc.await();
1027     }
1028     // Wake up this server to check in
1029     sleeper.skipSleepCycle();
1030
1031     // Wait for region servers to report in.
1032     // With this as part of master initialization, it precludes our being able to start a single
1033     // server that is both Master and RegionServer. Needs more thought. TODO.
1034     String statusStr = "Wait for region servers to report in";
1035     status.setStatus(statusStr);
1036     LOG.info(Objects.toString(status));
1037     waitForRegionServers(status);
1038
1039     // Check if master is shutting down because issue initializing regionservers or balancer.
1040     if (isStopped()) {
1041       return;
1042     }
1043
1044     status.setStatus("Starting assignment manager");
1045     // FIRST HBASE:META READ!!!!
1046     // The below cannot make progress w/o hbase:meta being online.
1047     // This is the FIRST attempt at going to hbase:meta. Meta on-lining is going on in background
1048     // as procedures run -- in particular SCPs for crashed servers... One should put up hbase:meta
1049     // if it is down. It may take a while to come online. So, wait here until meta if for sure
1050     // available. That's what waitForMetaOnline does.
1051     if (!waitForMetaOnline()) {
1052       return;
1053     }
1054     this.assignmentManager.joinCluster();
1055     // The below depends on hbase:meta being online.
1056     this.tableStateManager.start();
1057     // Below has to happen after tablestatemanager has started in the case where this hbase-2.x
1058     // is being started over an hbase-1.x dataset. tablestatemanager runs a migration as part
1059     // of its 'start' moving table state from zookeeper to hbase:meta. This migration needs to
1060     // complete before we do this next step processing offline regions else it fails reading
1061     // table states messing up master launch (namespace table, etc., are not assigned).
1062     this.assignmentManager.processOfflineRegions();
1063     // Initialize after meta is up as below scans meta
1064     if (favoredNodesManager != null && !maintenanceMode) {
1065       SnapshotOfRegionAssignmentFromMeta snapshotOfRegionAssignment =
1066           new SnapshotOfRegionAssignmentFromMeta(getConnection());
1067       snapshotOfRegionAssignment.initialize();
1068       favoredNodesManager.initialize(snapshotOfRegionAssignment);
1069     }
1070
1071     // set cluster status again after user regions are assigned
1072     this.balancer.setClusterMetrics(getClusterMetricsWithoutCoprocessor());
1073
1074     // Start balancer and meta catalog janitor after meta and regions have been assigned.
1075     status.setStatus("Starting balancer and catalog janitor");
1076     this.clusterStatusChore = new ClusterStatusChore(this, balancer);
1077     getChoreService().scheduleChore(clusterStatusChore);
1078     this.balancerChore = new BalancerChore(this);
1079     getChoreService().scheduleChore(balancerChore);
1080     this.normalizerChore = new RegionNormalizerChore(this);
1081     getChoreService().scheduleChore(normalizerChore);
1082     this.catalogJanitorChore = new CatalogJanitor(this);
1083     getChoreService().scheduleChore(catalogJanitorChore);
1084     this.hbckChore = new HbckChore(this);
1085     getChoreService().scheduleChore(hbckChore);
1086     this.serverManager.startChore();
1087
1088     // Only for rolling upgrade, where we need to migrate the data in namespace table to meta table.
1089     if (!waitForNamespaceOnline()) {
1090       return;
1091     }
1092     status.setStatus("Starting cluster schema service");
1093     initClusterSchemaService();
1094
1095     if (this.cpHost != null) {
1096       try {
1097         this.cpHost.preMasterInitialization();
1098       } catch (IOException e) {
1099         LOG.error("Coprocessor preMasterInitialization() hook failed", e);
1100       }
1101     }
1102
1103     status.markComplete("Initialization successful");
1104     LOG.info(String.format("Master has completed initialization %.3fsec",
1105        (System.currentTimeMillis() - masterActiveTime) / 1000.0f));
1106     this.masterFinishedInitializationTime = System.currentTimeMillis();
1107     configurationManager.registerObserver(this.balancer);
1108     configurationManager.registerObserver(this.cleanerPool);
1109     configurationManager.registerObserver(this.hfileCleaner);
1110     configurationManager.registerObserver(this.logCleaner);
1111     configurationManager.registerObserver(this.regionsRecoveryConfigManager);
1112     // Set master as 'initialized'.
1113     setInitialized(true);
1114
1115     if (maintenanceMode) {
1116       LOG.info("Detected repair mode, skipping final initialization steps.");
1117       return;
1118     }
1119
1120     assignmentManager.checkIfShouldMoveSystemRegionAsync();
1121     status.setStatus("Assign meta replicas");
1122     MasterMetaBootstrap metaBootstrap = createMetaBootstrap();
1123     metaBootstrap.assignMetaReplicas();
1124     status.setStatus("Starting quota manager");
1125     initQuotaManager();
1126     if (QuotaUtil.isQuotaEnabled(conf)) {
1127       // Create the quota snapshot notifier
1128       spaceQuotaSnapshotNotifier = createQuotaSnapshotNotifier();
1129       spaceQuotaSnapshotNotifier.initialize(getConnection());
1130       this.quotaObserverChore = new QuotaObserverChore(this, getMasterMetrics());
1131       // Start the chore to read the region FS space reports and act on them
1132       getChoreService().scheduleChore(quotaObserverChore);
1133
1134       this.snapshotQuotaChore = new SnapshotQuotaObserverChore(this, getMasterMetrics());
1135       // Start the chore to read snapshots and add their usage to table/NS quotas
1136       getChoreService().scheduleChore(snapshotQuotaChore);
1137     }
1138
1139     // clear the dead servers with same host name and port of online server because we are not
1140     // removing dead server with same hostname and port of rs which is trying to check in before
1141     // master initialization. See HBASE-5916.
1142     this.serverManager.clearDeadServersWithSameHostNameAndPortOfOnlineServer();
1143
1144     // Check and set the znode ACLs if needed in case we are overtaking a non-secure configuration
1145     status.setStatus("Checking ZNode ACLs");
1146     zooKeeper.checkAndSetZNodeAcls();
1147
1148     status.setStatus("Initializing MOB Cleaner");
1149     initMobCleaner();
1150
1151     status.setStatus("Calling postStartMaster coprocessors");
1152     if (this.cpHost != null) {
1153       // don't let cp initialization errors kill the master
1154       try {
1155         this.cpHost.postStartMaster();
1156       } catch (IOException ioe) {
1157         LOG.error("Coprocessor postStartMaster() hook failed", ioe);
1158       }
1159     }
1160
1161     zombieDetector.interrupt();
1162
1163     /*
1164      * After master has started up, lets do balancer post startup initialization. Since this runs
1165      * in activeMasterManager thread, it should be fine.
1166      */
1167     long start = System.currentTimeMillis();
1168     this.balancer.postMasterStartupInitialize();
1169     if (LOG.isDebugEnabled()) {
1170       LOG.debug("Balancer post startup initialization complete, took " + (
1171           (System.currentTimeMillis() - start) / 1000) + " seconds");
1172     }
1173   }
1174
1175   /**
1176    * Check hbase:meta is up and ready for reading. For use during Master startup only.
1177    * @return True if meta is UP and online and startup can progress. Otherwise, meta is not online
1178    *   and we will hold here until operator intervention.
1179    */
1180   @VisibleForTesting
1181   public boolean waitForMetaOnline() throws InterruptedException {
1182     return isRegionOnline(RegionInfoBuilder.FIRST_META_REGIONINFO);
1183   }
1184
1185   /**
1186    * @return True if region is online and scannable else false if an error or shutdown (Otherwise
1187    *   we just block in here holding up all forward-progess).
1188    */
1189   private boolean isRegionOnline(RegionInfo ri) throws InterruptedException {
1190     RetryCounter rc = null;
1191     while (!isStopped()) {
1192       RegionState rs = this.assignmentManager.getRegionStates().getRegionState(ri);
1193       if (rs.isOpened()) {
1194         if (this.getServerManager().isServerOnline(rs.getServerName())) {
1195           return true;
1196         }
1197       }
1198       // Region is not OPEN.
1199       Optional<Procedure<MasterProcedureEnv>> optProc = this.procedureExecutor.getProcedures().
1200           stream().filter(p -> p instanceof ServerCrashProcedure).findAny();
1201       // TODO: Add a page to refguide on how to do repair. Have this log message point to it.
1202       // Page will talk about loss of edits, how to schedule at least the meta WAL recovery, and
1203       // then how to assign including how to break region lock if one held.
1204       LOG.warn("{} is NOT online; state={}; ServerCrashProcedures={}. Master startup cannot " +
1205           "progress, in holding-pattern until region onlined.",
1206           ri.getRegionNameAsString(), rs, optProc.isPresent());
1207       // Check once-a-minute.
1208       if (rc == null) {
1209         rc = new RetryCounterFactory(1000).create();
1210       }
1211       Threads.sleep(rc.getBackoffTimeAndIncrementAttempts());
1212     }
1213     return false;
1214   }
1215
1216   /**
1217    * Check hbase:namespace table is assigned. If not, startup will hang looking for the ns table
1218    * <p/>
1219    * This is for rolling upgrading, later we will migrate the data in ns table to the ns family of
1220    * meta table. And if this is a new clsuter, this method will return immediately as there will be
1221    * no namespace table/region.
1222    * @return True if namespace table is up/online.
1223    */
1224   private boolean waitForNamespaceOnline() throws InterruptedException, IOException {
1225     TableState nsTableState =
1226       MetaTableAccessor.getTableState(getConnection(), TableName.NAMESPACE_TABLE_NAME);
1227     if (nsTableState == null || nsTableState.isDisabled()) {
1228       // this means we have already migrated the data and disabled or deleted the namespace table,
1229       // or this is a new depliy which does not have a namespace table from the beginning.
1230       return true;
1231     }
1232     List<RegionInfo> ris =
1233       this.assignmentManager.getRegionStates().getRegionsOfTable(TableName.NAMESPACE_TABLE_NAME);
1234     if (ris.isEmpty()) {
1235       // maybe this will not happen any more, but anyway, no harm to add a check here...
1236       return true;
1237     }
1238     // Else there are namespace regions up in meta. Ensure they are assigned before we go on.
1239     for (RegionInfo ri : ris) {
1240       isRegionOnline(ri);
1241     }
1242     return true;
1243   }
1244
1245   /**
1246    * Adds the {@code MasterQuotasObserver} to the list of configured Master observers to
1247    * automatically remove quotas for a table when that table is deleted.
1248    */
1249   @VisibleForTesting
1250   public void updateConfigurationForQuotasObserver(Configuration conf) {
1251     // We're configured to not delete quotas on table deletion, so we don't need to add the obs.
1252     if (!conf.getBoolean(
1253           MasterQuotasObserver.REMOVE_QUOTA_ON_TABLE_DELETE,
1254           MasterQuotasObserver.REMOVE_QUOTA_ON_TABLE_DELETE_DEFAULT)) {
1255       return;
1256     }
1257     String[] masterCoprocs = conf.getStrings(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY);
1258     final int length = null == masterCoprocs ? 0 : masterCoprocs.length;
1259     String[] updatedCoprocs = new String[length + 1];
1260     if (length > 0) {
1261       System.arraycopy(masterCoprocs, 0, updatedCoprocs, 0, masterCoprocs.length);
1262     }
1263     updatedCoprocs[length] = MasterQuotasObserver.class.getName();
1264     conf.setStrings(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, updatedCoprocs);
1265   }
1266
1267   private void initMobCleaner() {
1268     this.expiredMobFileCleanerChore = new ExpiredMobFileCleanerChore(this);
1269     getChoreService().scheduleChore(expiredMobFileCleanerChore);
1270
1271     int mobCompactionPeriod = conf.getInt(MobConstants.MOB_COMPACTION_CHORE_PERIOD,
1272         MobConstants.DEFAULT_MOB_COMPACTION_CHORE_PERIOD);
1273     this.mobCompactChore = new MobCompactionChore(this, mobCompactionPeriod);
1274     getChoreService().scheduleChore(mobCompactChore);
1275     this.mobCompactThread = new MasterMobCompactionThread(this);
1276   }
1277
1278   /**
1279    * <p>
1280    * Create a {@link MasterMetaBootstrap} instance.
1281    * </p>
1282    * <p>
1283    * Will be overridden in tests.
1284    * </p>
1285    */
1286   @VisibleForTesting
1287   protected MasterMetaBootstrap createMetaBootstrap() {
1288     // We put this out here in a method so can do a Mockito.spy and stub it out
1289     // w/ a mocked up MasterMetaBootstrap.
1290     return new MasterMetaBootstrap(this);
1291   }
1292
1293   /**
1294    * <p>
1295    * Create a {@link ServerManager} instance.
1296    * </p>
1297    * <p>
1298    * Will be overridden in tests.
1299    * </p>
1300    */
1301   @VisibleForTesting
1302   protected ServerManager createServerManager(final MasterServices master) throws IOException {
1303     // We put this out here in a method so can do a Mockito.spy and stub it out
1304     // w/ a mocked up ServerManager.
1305     setupClusterConnection();
1306     return new ServerManager(master);
1307   }
1308
1309   private void waitForRegionServers(final MonitoredTask status)
1310       throws IOException, InterruptedException {
1311     this.serverManager.waitForRegionServers(status);
1312   }
1313
1314   // Will be overridden in tests
1315   @VisibleForTesting
1316   protected void initClusterSchemaService() throws IOException, InterruptedException {
1317     this.clusterSchemaService = new ClusterSchemaServiceImpl(this);
1318     this.clusterSchemaService.startAsync();
1319     try {
1320       this.clusterSchemaService.awaitRunning(getConfiguration().getInt(
1321         HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS,
1322         DEFAULT_HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS), TimeUnit.SECONDS);
1323     } catch (TimeoutException toe) {
1324       throw new IOException("Timedout starting ClusterSchemaService", toe);
1325     }
1326   }
1327
1328   private void initQuotaManager() throws IOException {
1329     MasterQuotaManager quotaManager = new MasterQuotaManager(this);
1330     quotaManager.start();
1331     this.quotaManager = quotaManager;
1332   }
1333
1334   private SpaceQuotaSnapshotNotifier createQuotaSnapshotNotifier() {
1335     SpaceQuotaSnapshotNotifier notifier =
1336         SpaceQuotaSnapshotNotifierFactory.getInstance().create(getConfiguration());
1337     return notifier;
1338   }
1339
1340   boolean isCatalogJanitorEnabled() {
1341     return catalogJanitorChore != null ?
1342       catalogJanitorChore.getEnabled() : false;
1343   }
1344
1345   boolean isCleanerChoreEnabled() {
1346     boolean hfileCleanerFlag = true, logCleanerFlag = true;
1347
1348     if (hfileCleaner != null) {
1349       hfileCleanerFlag = hfileCleaner.getEnabled();
1350     }
1351
1352     if (logCleaner != null) {
1353       logCleanerFlag = logCleaner.getEnabled();
1354     }
1355
1356     return (hfileCleanerFlag && logCleanerFlag);
1357   }
1358
1359   @Override
1360   public ServerManager getServerManager() {
1361     return this.serverManager;
1362   }
1363
1364   @Override
1365   public MasterFileSystem getMasterFileSystem() {
1366     return this.fileSystemManager;
1367   }
1368
1369   @Override
1370   public MasterWalManager getMasterWalManager() {
1371     return this.walManager;
1372   }
1373
1374   @Override
1375   public SplitWALManager getSplitWALManager() {
1376     return splitWALManager;
1377   }
1378
1379   @Override
1380   public TableStateManager getTableStateManager() {
1381     return tableStateManager;
1382   }
1383
1384   /*
1385    * Start up all services. If any of these threads gets an unhandled exception
1386    * then they just die with a logged message.  This should be fine because
1387    * in general, we do not expect the master to get such unhandled exceptions
1388    *  as OOMEs; it should be lightly loaded. See what HRegionServer does if
1389    *  need to install an unexpected exception handler.
1390    */
1391   private void startServiceThreads() throws IOException {
1392     // Start the executor service pools
1393     this.executorService.startExecutorService(ExecutorType.MASTER_OPEN_REGION, conf.getInt(
1394       HConstants.MASTER_OPEN_REGION_THREADS, HConstants.MASTER_OPEN_REGION_THREADS_DEFAULT));
1395     this.executorService.startExecutorService(ExecutorType.MASTER_CLOSE_REGION, conf.getInt(
1396       HConstants.MASTER_CLOSE_REGION_THREADS, HConstants.MASTER_CLOSE_REGION_THREADS_DEFAULT));
1397     this.executorService.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS,
1398       conf.getInt(HConstants.MASTER_SERVER_OPERATIONS_THREADS,
1399         HConstants.MASTER_SERVER_OPERATIONS_THREADS_DEFAULT));
1400     this.executorService.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS,
1401       conf.getInt(HConstants.MASTER_META_SERVER_OPERATIONS_THREADS,
1402         HConstants.MASTER_META_SERVER_OPERATIONS_THREADS_DEFAULT));
1403     this.executorService.startExecutorService(ExecutorType.M_LOG_REPLAY_OPS, conf.getInt(
1404       HConstants.MASTER_LOG_REPLAY_OPS_THREADS, HConstants.MASTER_LOG_REPLAY_OPS_THREADS_DEFAULT));
1405     this.executorService.startExecutorService(ExecutorType.MASTER_SNAPSHOT_OPERATIONS, conf.getInt(
1406       SnapshotManager.SNAPSHOT_POOL_THREADS_KEY, SnapshotManager.SNAPSHOT_POOL_THREADS_DEFAULT));
1407
1408     // We depend on there being only one instance of this executor running
1409     // at a time. To do concurrency, would need fencing of enable/disable of
1410     // tables.
1411     // Any time changing this maxThreads to > 1, pls see the comment at
1412     // AccessController#postCompletedCreateTableAction
1413     this.executorService.startExecutorService(ExecutorType.MASTER_TABLE_OPERATIONS, 1);
1414     startProcedureExecutor();
1415
1416     // Start log cleaner thread
1417     int cleanerInterval =
1418       conf.getInt(HBASE_MASTER_CLEANER_INTERVAL, DEFAULT_HBASE_MASTER_CLEANER_INTERVAL);
1419     this.logCleaner = new LogCleaner(cleanerInterval, this, conf,
1420       getMasterWalManager().getFileSystem(), getMasterWalManager().getOldLogDir(), cleanerPool);
1421     getChoreService().scheduleChore(logCleaner);
1422
1423     // start the hfile archive cleaner thread
1424     Path archiveDir = HFileArchiveUtil.getArchivePath(conf);
1425     Map<String, Object> params = new HashMap<>();
1426     params.put(MASTER, this);
1427     this.hfileCleaner = new HFileCleaner(cleanerInterval, this, conf,
1428       getMasterFileSystem().getFileSystem(), archiveDir, cleanerPool, params);
1429     getChoreService().scheduleChore(hfileCleaner);
1430
1431     // Regions Reopen based on very high storeFileRefCount is considered enabled
1432     // only if hbase.regions.recovery.store.file.ref.count has value > 0
1433     final int maxStoreFileRefCount = conf.getInt(
1434       HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
1435       HConstants.DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD);
1436     if (maxStoreFileRefCount > 0) {
1437       this.regionsRecoveryChore = new RegionsRecoveryChore(this, conf, this);
1438       getChoreService().scheduleChore(this.regionsRecoveryChore);
1439     } else {
1440       LOG.info("Reopening regions with very high storeFileRefCount is disabled. " +
1441           "Provide threshold value > 0 for {} to enable it.",
1442         HConstants.STORE_FILE_REF_COUNT_THRESHOLD);
1443     }
1444
1445     this.regionsRecoveryConfigManager = new RegionsRecoveryConfigManager(this);
1446
1447     replicationBarrierCleaner = new ReplicationBarrierCleaner(conf, this, getConnection(),
1448       replicationPeerManager);
1449     getChoreService().scheduleChore(replicationBarrierCleaner);
1450
1451     final boolean isSnapshotChoreEnabled = this.snapshotCleanupTracker
1452         .isSnapshotCleanupEnabled();
1453     this.snapshotCleanerChore = new SnapshotCleanerChore(this, conf, getSnapshotManager());
1454     if (isSnapshotChoreEnabled) {
1455       getChoreService().scheduleChore(this.snapshotCleanerChore);
1456     } else {
1457       if (LOG.isTraceEnabled()) {
1458         LOG.trace("Snapshot Cleaner Chore is disabled. Not starting up the chore..");
1459       }
1460     }
1461     serviceStarted = true;
1462     if (LOG.isTraceEnabled()) {
1463       LOG.trace("Started service threads");
1464     }
1465   }
1466
1467   @Override
1468   protected void stopServiceThreads() {
1469     if (masterJettyServer != null) {
1470       LOG.info("Stopping master jetty server");
1471       try {
1472         masterJettyServer.stop();
1473       } catch (Exception e) {
1474         LOG.error("Failed to stop master jetty server", e);
1475       }
1476     }
1477     stopChores();
1478     if (this.mobCompactThread != null) {
1479       this.mobCompactThread.close();
1480     }
1481     super.stopServiceThreads();
1482     if (cleanerPool != null) {
1483       cleanerPool.shutdownNow();
1484       cleanerPool = null;
1485     }
1486
1487     LOG.debug("Stopping service threads");
1488
1489     if (this.quotaManager != null) {
1490       this.quotaManager.stop();
1491     }
1492
1493     if (this.activeMasterManager != null) {
1494       this.activeMasterManager.stop();
1495     }
1496     if (this.serverManager != null) {
1497       this.serverManager.stop();
1498     }
1499     if (this.assignmentManager != null) {
1500       this.assignmentManager.stop();
1501     }
1502
1503     stopProcedureExecutor();
1504
1505     if (this.walManager != null) {
1506       this.walManager.stop();
1507     }
1508     if (this.fileSystemManager != null) {
1509       this.fileSystemManager.stop();
1510     }
1511     if (this.mpmHost != null) {
1512       this.mpmHost.stop("server shutting down.");
1513     }
1514     if (this.regionServerTracker != null) {
1515       this.regionServerTracker.stop();
1516     }
1517   }
1518
1519   private void createProcedureExecutor() throws IOException {
1520     MasterProcedureEnv procEnv = new MasterProcedureEnv(this);
1521     // Create cleaner thread pool
1522     cleanerPool = new DirScanPool(conf);
1523     procedureStore = new RegionProcedureStore(this, cleanerPool,
1524       new MasterProcedureEnv.FsUtilsLeaseRecovery(this));
1525     procedureStore.registerListener(new ProcedureStoreListener() {
1526
1527       @Override
1528       public void abortProcess() {
1529         abort("The Procedure Store lost the lease", null);
1530       }
1531     });
1532     MasterProcedureScheduler procedureScheduler = procEnv.getProcedureScheduler();
1533     procedureExecutor = new ProcedureExecutor<>(conf, procEnv, procedureStore, procedureScheduler);
1534     configurationManager.registerObserver(procEnv);
1535
1536     int cpus = Runtime.getRuntime().availableProcessors();
1537     final int numThreads = conf.getInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, Math.max(
1538       (cpus > 0 ? cpus / 4 : 0), MasterProcedureConstants.DEFAULT_MIN_MASTER_PROCEDURE_THREADS));
1539     final boolean abortOnCorruption =
1540       conf.getBoolean(MasterProcedureConstants.EXECUTOR_ABORT_ON_CORRUPTION,
1541         MasterProcedureConstants.DEFAULT_EXECUTOR_ABORT_ON_CORRUPTION);
1542     procedureStore.start(numThreads);
1543     // Just initialize it but do not start the workers, we will start the workers later by calling
1544     // startProcedureExecutor. See the javadoc for finishActiveMasterInitialization for more
1545     // details.
1546     procedureExecutor.init(numThreads, abortOnCorruption);
1547     procEnv.getRemoteDispatcher().start();
1548   }
1549
1550   private void startProcedureExecutor() throws IOException {
1551     procedureExecutor.startWorkers();
1552   }
1553
1554   /**
1555    * Turn on/off Snapshot Cleanup Chore
1556    *
1557    * @param on indicates whether Snapshot Cleanup Chore is to be run
1558    */
1559   void switchSnapshotCleanup(final boolean on, final boolean synchronous) {
1560     if (synchronous) {
1561       synchronized (this.snapshotCleanerChore) {
1562         switchSnapshotCleanup(on);
1563       }
1564     } else {
1565       switchSnapshotCleanup(on);
1566     }
1567   }
1568
1569   private void switchSnapshotCleanup(final boolean on) {
1570     try {
1571       snapshotCleanupTracker.setSnapshotCleanupEnabled(on);
1572       if (on) {
1573         if (!getChoreService().isChoreScheduled(this.snapshotCleanerChore)) {
1574           getChoreService().scheduleChore(this.snapshotCleanerChore);
1575         }
1576       } else {
1577         getChoreService().cancelChore(this.snapshotCleanerChore);
1578       }
1579     } catch (KeeperException e) {
1580       LOG.error("Error updating snapshot cleanup mode to {}", on, e);
1581     }
1582   }
1583
1584
1585   private void stopProcedureExecutor() {
1586     if (procedureExecutor != null) {
1587       configurationManager.deregisterObserver(procedureExecutor.getEnvironment());
1588       procedureExecutor.getEnvironment().getRemoteDispatcher().stop();
1589       procedureExecutor.stop();
1590       procedureExecutor.join();
1591       procedureExecutor = null;
1592     }
1593
1594     if (procedureStore != null) {
1595       procedureStore.stop(isAborted());
1596       procedureStore = null;
1597     }
1598   }
1599
1600   private void stopChores() {
1601     ChoreService choreService = getChoreService();
1602     if (choreService != null) {
1603       choreService.cancelChore(this.expiredMobFileCleanerChore);
1604       choreService.cancelChore(this.mobCompactChore);
1605       choreService.cancelChore(this.balancerChore);
1606       choreService.cancelChore(this.normalizerChore);
1607       choreService.cancelChore(this.clusterStatusChore);
1608       choreService.cancelChore(this.catalogJanitorChore);
1609       choreService.cancelChore(this.clusterStatusPublisherChore);
1610       choreService.cancelChore(this.snapshotQuotaChore);
1611       choreService.cancelChore(this.logCleaner);
1612       choreService.cancelChore(this.hfileCleaner);
1613       choreService.cancelChore(this.replicationBarrierCleaner);
1614       choreService.cancelChore(this.snapshotCleanerChore);
1615       choreService.cancelChore(this.hbckChore);
1616       choreService.cancelChore(this.regionsRecoveryChore);
1617     }
1618   }
1619
1620   /**
1621    * @return Get remote side's InetAddress
1622    */
1623   InetAddress getRemoteInetAddress(final int port,
1624       final long serverStartCode) throws UnknownHostException {
1625     // Do it out here in its own little method so can fake an address when
1626     // mocking up in tests.
1627     InetAddress ia = RpcServer.getRemoteIp();
1628
1629     // The call could be from the local regionserver,
1630     // in which case, there is no remote address.
1631     if (ia == null && serverStartCode == startcode) {
1632       InetSocketAddress isa = rpcServices.getSocketAddress();
1633       if (isa != null && isa.getPort() == port) {
1634         ia = isa.getAddress();
1635       }
1636     }
1637     return ia;
1638   }
1639
1640   /**
1641    * @return Maximum time we should run balancer for
1642    */
1643   private int getMaxBalancingTime() {
1644     // if max balancing time isn't set, defaulting it to period time
1645     int maxBalancingTime = getConfiguration().getInt(HConstants.HBASE_BALANCER_MAX_BALANCING,
1646       getConfiguration()
1647         .getInt(HConstants.HBASE_BALANCER_PERIOD, HConstants.DEFAULT_HBASE_BALANCER_PERIOD));
1648     return maxBalancingTime;
1649   }
1650
1651   /**
1652    * @return Maximum number of regions in transition
1653    */
1654   private int getMaxRegionsInTransition() {
1655     int numRegions = this.assignmentManager.getRegionStates().getRegionAssignments().size();
1656     return Math.max((int) Math.floor(numRegions * this.maxRitPercent), 1);
1657   }
1658
1659   /**
1660    * It first sleep to the next balance plan start time. Meanwhile, throttling by the max
1661    * number regions in transition to protect availability.
1662    * @param nextBalanceStartTime The next balance plan start time
1663    * @param maxRegionsInTransition max number of regions in transition
1664    * @param cutoffTime when to exit balancer
1665    */
1666   private void balanceThrottling(long nextBalanceStartTime, int maxRegionsInTransition,
1667       long cutoffTime) {
1668     boolean interrupted = false;
1669
1670     // Sleep to next balance plan start time
1671     // But if there are zero regions in transition, it can skip sleep to speed up.
1672     while (!interrupted && System.currentTimeMillis() < nextBalanceStartTime
1673         && this.assignmentManager.getRegionStates().hasRegionsInTransition()) {
1674       try {
1675         Thread.sleep(100);
1676       } catch (InterruptedException ie) {
1677         interrupted = true;
1678       }
1679     }
1680
1681     // Throttling by max number regions in transition
1682     while (!interrupted
1683         && maxRegionsInTransition > 0
1684         && this.assignmentManager.getRegionStates().getRegionsInTransitionCount()
1685         >= maxRegionsInTransition && System.currentTimeMillis() <= cutoffTime) {
1686       try {
1687         // sleep if the number of regions in transition exceeds the limit
1688         Thread.sleep(100);
1689       } catch (InterruptedException ie) {
1690         interrupted = true;
1691       }
1692     }
1693
1694     if (interrupted) Thread.currentThread().interrupt();
1695   }
1696
1697   public boolean balance() throws IOException {
1698     return balance(false);
1699   }
1700
1701   public boolean balance(boolean force) throws IOException {
1702     // if master not initialized, don't run balancer.
1703     if (!isInitialized()) {
1704       LOG.debug("Master has not been initialized, don't run balancer.");
1705       return false;
1706     }
1707
1708     if (isInMaintenanceMode()) {
1709       LOG.info("Master is in maintenanceMode mode, don't run balancer.");
1710       return false;
1711     }
1712
1713     synchronized (this.balancer) {
1714       // If balance not true, don't run balancer.
1715       if (!this.loadBalancerTracker.isBalancerOn()) return false;
1716         // Only allow one balance run at at time.
1717       if (this.assignmentManager.hasRegionsInTransition()) {
1718         List<RegionStateNode> regionsInTransition = assignmentManager.getRegionsInTransition();
1719         // if hbase:meta region is in transition, result of assignment cannot be recorded
1720         // ignore the force flag in that case
1721         boolean metaInTransition = assignmentManager.isMetaRegionInTransition();
1722         String prefix = force && !metaInTransition ? "R" : "Not r";
1723         List<RegionStateNode> toPrint = regionsInTransition;
1724         int max = 5;
1725         boolean truncated = false;
1726         if (regionsInTransition.size() > max) {
1727           toPrint = regionsInTransition.subList(0, max);
1728           truncated = true;
1729         }
1730         LOG.info(prefix + "unning balancer because " + regionsInTransition.size() +
1731           " region(s) in transition: " + toPrint + (truncated? "(truncated list)": ""));
1732         if (!force || metaInTransition) return false;
1733       }
1734       if (this.serverManager.areDeadServersInProgress()) {
1735         LOG.info("Not running balancer because processing dead regionserver(s): " +
1736           this.serverManager.getDeadServers());
1737         return false;
1738       }
1739
1740       if (this.cpHost != null) {
1741         try {
1742           if (this.cpHost.preBalance()) {
1743             LOG.debug("Coprocessor bypassing balancer request");
1744             return false;
1745           }
1746         } catch (IOException ioe) {
1747           LOG.error("Error invoking master coprocessor preBalance()", ioe);
1748           return false;
1749         }
1750       }
1751
1752       boolean isByTable = getConfiguration().getBoolean("hbase.master.loadbalance.bytable", false);
1753       Map<TableName, Map<ServerName, List<RegionInfo>>> assignments =
1754         this.assignmentManager.getRegionStates()
1755           .getAssignmentsForBalancer(tableStateManager, this.serverManager.getOnlineServersList(),
1756             isByTable);
1757       for (Map<ServerName, List<RegionInfo>> serverMap : assignments.values()) {
1758         serverMap.keySet().removeAll(this.serverManager.getDrainingServersList());
1759       }
1760
1761       //Give the balancer the current cluster state.
1762       this.balancer.setClusterMetrics(getClusterMetricsWithoutCoprocessor());
1763       this.balancer.setClusterLoad(assignments);
1764
1765       List<RegionPlan> plans = new ArrayList<>();
1766       for (Entry<TableName, Map<ServerName, List<RegionInfo>>> e : assignments.entrySet()) {
1767         List<RegionPlan> partialPlans = this.balancer.balanceCluster(e.getKey(), e.getValue());
1768         if (partialPlans != null) {
1769           plans.addAll(partialPlans);
1770         }
1771       }
1772
1773       List<RegionPlan> sucRPs = executeRegionPlansWithThrottling(plans);
1774
1775       if (this.cpHost != null) {
1776         try {
1777           this.cpHost.postBalance(sucRPs);
1778         } catch (IOException ioe) {
1779           // balancing already succeeded so don't change the result
1780           LOG.error("Error invoking master coprocessor postBalance()", ioe);
1781         }
1782       }
1783     }
1784     // If LoadBalancer did not generate any plans, it means the cluster is already balanced.
1785     // Return true indicating a success.
1786     return true;
1787   }
1788
1789   public List<RegionPlan> executeRegionPlansWithThrottling(List<RegionPlan> plans) {
1790     List<RegionPlan> sucRPs = new ArrayList<>();
1791     int maxRegionsInTransition = getMaxRegionsInTransition();
1792     long balanceStartTime = System.currentTimeMillis();
1793     long cutoffTime = balanceStartTime + this.maxBlancingTime;
1794     int rpCount = 0;  // number of RegionPlans balanced so far
1795     if (plans != null && !plans.isEmpty()) {
1796       int balanceInterval = this.maxBlancingTime / plans.size();
1797       LOG.info("Balancer plans size is " + plans.size() + ", the balance interval is "
1798           + balanceInterval + " ms, and the max number regions in transition is "
1799           + maxRegionsInTransition);
1800
1801       for (RegionPlan plan: plans) {
1802         LOG.info("balance " + plan);
1803         //TODO: bulk assign
1804         try {
1805           this.assignmentManager.moveAsync(plan);
1806         } catch (HBaseIOException hioe) {
1807           //should ignore failed plans here, avoiding the whole balance plans be aborted
1808           //later calls of balance() can fetch up the failed and skipped plans
1809           LOG.warn("Failed balance plan {}, skipping...", plan, hioe);
1810         }
1811         //rpCount records balance plans processed, does not care if a plan succeeds
1812         rpCount++;
1813
1814         if (this.maxBlancingTime > 0) {
1815           balanceThrottling(balanceStartTime + rpCount * balanceInterval, maxRegionsInTransition,
1816             cutoffTime);
1817         }
1818
1819         // if performing next balance exceeds cutoff time, exit the loop
1820         if (this.maxBlancingTime > 0 && rpCount < plans.size()
1821           && System.currentTimeMillis() > cutoffTime) {
1822           // TODO: After balance, there should not be a cutoff time (keeping it as
1823           // a security net for now)
1824           LOG.debug("No more balancing till next balance run; maxBalanceTime="
1825               + this.maxBlancingTime);
1826           break;
1827         }
1828       }
1829     }
1830     return sucRPs;
1831   }
1832
1833   @Override
1834   @VisibleForTesting
1835   public RegionNormalizer getRegionNormalizer() {
1836     return this.normalizer;
1837   }
1838
1839   /**
1840    * Perform normalization of cluster (invoked by {@link RegionNormalizerChore}).
1841    *
1842    * @return true if normalization step was performed successfully, false otherwise
1843    *    (specifically, if HMaster hasn't been initialized properly or normalization
1844    *    is globally disabled)
1845    */
1846   public boolean normalizeRegions() throws IOException {
1847     if (!isInitialized()) {
1848       LOG.debug("Master has not been initialized, don't run region normalizer.");
1849       return false;
1850     }
1851     if (this.getServerManager().isClusterShutdown()) {
1852       LOG.info("Cluster is shutting down, don't run region normalizer.");
1853       return false;
1854     }
1855     if (isInMaintenanceMode()) {
1856       LOG.info("Master is in maintenance mode, don't run region normalizer.");
1857       return false;
1858     }
1859     if (!this.regionNormalizerTracker.isNormalizerOn()) {
1860       LOG.debug("Region normalization is disabled, don't run region normalizer.");
1861       return false;
1862     }
1863
1864     synchronized (this.normalizer) {
1865       // Don't run the normalizer concurrently
1866       List<TableName> allEnabledTables = new ArrayList<>(
1867         this.tableStateManager.getTablesInStates(TableState.State.ENABLED));
1868
1869       Collections.shuffle(allEnabledTables);
1870
1871       for (TableName table : allEnabledTables) {
1872         if (isInMaintenanceMode()) {
1873           LOG.debug("Master is in maintenance mode, stop running region normalizer.");
1874           return false;
1875         }
1876
1877         TableDescriptor tblDesc = getTableDescriptors().get(table);
1878         if (table.isSystemTable() || (tblDesc != null &&
1879             !tblDesc.isNormalizationEnabled())) {
1880           LOG.trace("Skipping normalization for {}, as it's either system"
1881               + " table or doesn't have auto normalization turned on", table);
1882           continue;
1883         }
1884         List<NormalizationPlan> plans = this.normalizer.computePlanForTable(table);
1885         if (plans != null) {
1886           for (NormalizationPlan plan : plans) {
1887             plan.execute(asyncClusterConnection.toConnection().getAdmin());
1888             if (plan.getType() == PlanType.SPLIT) {
1889               splitPlanCount++;
1890             } else if (plan.getType() == PlanType.MERGE) {
1891               mergePlanCount++;
1892             }
1893           }
1894         }
1895       }
1896     }
1897     // If Region did not generate any plans, it means the cluster is already balanced.
1898     // Return true indicating a success.
1899     return true;
1900   }
1901
1902   /**
1903    * @return Client info for use as prefix on an audit log string; who did an action
1904    */
1905   @Override
1906   public String getClientIdAuditPrefix() {
1907     return "Client=" + RpcServer.getRequestUserName().orElse(null)
1908         + "/" + RpcServer.getRemoteAddress().orElse(null);
1909   }
1910
1911   /**
1912    * Switch for the background CatalogJanitor thread.
1913    * Used for testing.  The thread will continue to run.  It will just be a noop
1914    * if disabled.
1915    * @param b If false, the catalog janitor won't do anything.
1916    */
1917   public void setCatalogJanitorEnabled(final boolean b) {
1918     this.catalogJanitorChore.setEnabled(b);
1919   }
1920
1921   @Override
1922   public long mergeRegions(
1923       final RegionInfo[] regionsToMerge,
1924       final boolean forcible,
1925       final long ng,
1926       final long nonce) throws IOException {
1927     checkInitialized();
1928
1929     if (!isSplitOrMergeEnabled(MasterSwitchType.MERGE)) {
1930       String regionsStr = Arrays.deepToString(regionsToMerge);
1931       LOG.warn("Merge switch is off! skip merge of " + regionsStr);
1932       throw new IOException("Merge of " + regionsStr + " failed because merge switch is off");
1933     }
1934
1935     final String mergeRegionsStr = Arrays.stream(regionsToMerge).
1936       map(r -> RegionInfo.getShortNameToLog(r)).collect(Collectors.joining(", "));
1937     return MasterProcedureUtil.submitProcedure(new NonceProcedureRunnable(this, ng, nonce) {
1938       @Override
1939       protected void run() throws IOException {
1940         getMaster().getMasterCoprocessorHost().preMergeRegions(regionsToMerge);
1941         String aid = getClientIdAuditPrefix();
1942         LOG.info("{} merge regions {}", aid, mergeRegionsStr);
1943         submitProcedure(new MergeTableRegionsProcedure(procedureExecutor.getEnvironment(),
1944             regionsToMerge, forcible));
1945         getMaster().getMasterCoprocessorHost().postMergeRegions(regionsToMerge);
1946       }
1947
1948       @Override
1949       protected String getDescription() {
1950         return "MergeTableProcedure";
1951       }
1952     });
1953   }
1954
1955   @Override
1956   public long splitRegion(final RegionInfo regionInfo, final byte[] splitRow,
1957       final long nonceGroup, final long nonce)
1958   throws IOException {
1959     checkInitialized();
1960
1961     if (!isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) {
1962       LOG.warn("Split switch is off! skip split of " + regionInfo);
1963       throw new IOException("Split region " + regionInfo.getRegionNameAsString() +
1964           " failed due to split switch off");
1965     }
1966
1967     return MasterProcedureUtil.submitProcedure(
1968         new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
1969       @Override
1970       protected void run() throws IOException {
1971         getMaster().getMasterCoprocessorHost().preSplitRegion(regionInfo.getTable(), splitRow);
1972         LOG.info(getClientIdAuditPrefix() + " split " + regionInfo.getRegionNameAsString());
1973
1974         // Execute the operation asynchronously
1975         submitProcedure(getAssignmentManager().createSplitProcedure(regionInfo, splitRow));
1976       }
1977
1978       @Override
1979       protected String getDescription() {
1980         return "SplitTableProcedure";
1981       }
1982     });
1983   }
1984
1985   private void warmUpRegion(ServerName server, RegionInfo region) {
1986     FutureUtils.addListener(asyncClusterConnection.getRegionServerAdmin(server)
1987       .warmupRegion(RequestConverter.buildWarmupRegionRequest(region)), (r, e) -> {
1988         if (e != null) {
1989           LOG.warn("Failed to warm up region {} on server {}", region, server, e);
1990         }
1991       });
1992   }
1993
1994   // Public so can be accessed by tests. Blocks until move is done.
1995   // Replace with an async implementation from which you can get
1996   // a success/failure result.
1997   @VisibleForTesting
1998   public void move(final byte[] encodedRegionName, byte[] destServerName) throws HBaseIOException {
1999     RegionState regionState = assignmentManager.getRegionStates().
2000       getRegionState(Bytes.toString(encodedRegionName));
2001
2002     RegionInfo hri;
2003     if (regionState != null) {
2004       hri = regionState.getRegion();
2005     } else {
2006       throw new UnknownRegionException(Bytes.toStringBinary(encodedRegionName));
2007     }
2008
2009     ServerName dest;
2010     List<ServerName> exclude = hri.getTable().isSystemTable() ? assignmentManager.getExcludedServersForSystemTable()
2011         : new ArrayList<>(1);
2012     if (destServerName != null && exclude.contains(ServerName.valueOf(Bytes.toString(destServerName)))) {
2013       LOG.info(
2014           Bytes.toString(encodedRegionName) + " can not move to " + Bytes.toString(destServerName)
2015               + " because the server is in exclude list");
2016       destServerName = null;
2017     }
2018     if (destServerName == null || destServerName.length == 0) {
2019       LOG.info("Passed destination servername is null/empty so " +
2020         "choosing a server at random");
2021       exclude.add(regionState.getServerName());
2022       final List<ServerName> destServers = this.serverManager.createDestinationServersList(exclude);
2023       dest = balancer.randomAssignment(hri, destServers);
2024       if (dest == null) {
2025         LOG.debug("Unable to determine a plan to assign " + hri);
2026         return;
2027       }
2028     } else {
2029       ServerName candidate = ServerName.valueOf(Bytes.toString(destServerName));
2030       dest = balancer.randomAssignment(hri, Lists.newArrayList(candidate));
2031       if (dest == null) {
2032         LOG.debug("Unable to determine a plan to assign " + hri);
2033         return;
2034       }
2035       // TODO: What is this? I don't get it.
2036       if (dest.equals(serverName) && balancer instanceof BaseLoadBalancer
2037           && !((BaseLoadBalancer)balancer).shouldBeOnMaster(hri)) {
2038         // To avoid unnecessary region moving later by balancer. Don't put user
2039         // regions on master.
2040         LOG.debug("Skipping move of region " + hri.getRegionNameAsString()
2041           + " to avoid unnecessary region moving later by load balancer,"
2042           + " because it should not be on master");
2043         return;
2044       }
2045     }
2046
2047     if (dest.equals(regionState.getServerName())) {
2048       LOG.debug("Skipping move of region " + hri.getRegionNameAsString()
2049         + " because region already assigned to the same server " + dest + ".");
2050       return;
2051     }
2052
2053     // Now we can do the move
2054     RegionPlan rp = new RegionPlan(hri, regionState.getServerName(), dest);
2055     assert rp.getDestination() != null: rp.toString() + " " + dest;
2056
2057     try {
2058       checkInitialized();
2059       if (this.cpHost != null) {
2060         this.cpHost.preMove(hri, rp.getSource(), rp.getDestination());
2061       }
2062
2063       TransitRegionStateProcedure proc =
2064         this.assignmentManager.createMoveRegionProcedure(rp.getRegionInfo(), rp.getDestination());
2065       // Warmup the region on the destination before initiating the move.
2066       // A region server could reject the close request because it either does not
2067       // have the specified region or the region is being split.
2068       warmUpRegion(rp.getDestination(), hri);
2069
2070       LOG.info(getClientIdAuditPrefix() + " move " + rp + ", running balancer");
2071       Future<byte[]> future = ProcedureSyncWait.submitProcedure(this.procedureExecutor, proc);
2072       try {
2073         // Is this going to work? Will we throw exception on error?
2074         // TODO: CompletableFuture rather than this stunted Future.
2075         future.get();
2076       } catch (InterruptedException | ExecutionException e) {
2077         throw new HBaseIOException(e);
2078       }
2079       if (this.cpHost != null) {
2080         this.cpHost.postMove(hri, rp.getSource(), rp.getDestination());
2081       }
2082     } catch (IOException ioe) {
2083       if (ioe instanceof HBaseIOException) {
2084         throw (HBaseIOException)ioe;
2085       }
2086       throw new HBaseIOException(ioe);
2087     }
2088   }
2089
2090   @Override
2091   public long createTable(final TableDescriptor tableDescriptor, final byte[][] splitKeys,
2092       final long nonceGroup, final long nonce) throws IOException {
2093     checkInitialized();
2094     TableDescriptor desc = getMasterCoprocessorHost().preCreateTableRegionsInfos(tableDescriptor);
2095     if (desc == null) {
2096       throw new IOException("Creation for " + tableDescriptor + " is canceled by CP");
2097     }
2098     String namespace = desc.getTableName().getNamespaceAsString();
2099     this.clusterSchemaService.getNamespace(namespace);
2100
2101     RegionInfo[] newRegions = ModifyRegionUtils.createRegionInfos(desc, splitKeys);
2102     TableDescriptorChecker.sanityCheck(conf, desc);
2103
2104     return MasterProcedureUtil
2105       .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2106         @Override
2107         protected void run() throws IOException {
2108           getMaster().getMasterCoprocessorHost().preCreateTable(desc, newRegions);
2109
2110           LOG.info(getClientIdAuditPrefix() + " create " + desc);
2111
2112           // TODO: We can handle/merge duplicate requests, and differentiate the case of
2113           // TableExistsException by saying if the schema is the same or not.
2114           //
2115           // We need to wait for the procedure to potentially fail due to "prepare" sanity
2116           // checks. This will block only the beginning of the procedure. See HBASE-19953.
2117           ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
2118           submitProcedure(
2119             new CreateTableProcedure(procedureExecutor.getEnvironment(), desc, newRegions, latch));
2120           latch.await();
2121
2122           getMaster().getMasterCoprocessorHost().postCreateTable(desc, newRegions);
2123         }
2124
2125         @Override
2126         protected String getDescription() {
2127           return "CreateTableProcedure";
2128         }
2129       });
2130   }
2131
2132   @Override
2133   public long createSystemTable(final TableDescriptor tableDescriptor) throws IOException {
2134     if (isStopped()) {
2135       throw new MasterNotRunningException();
2136     }
2137
2138     TableName tableName = tableDescriptor.getTableName();
2139     if (!(tableName.isSystemTable())) {
2140       throw new IllegalArgumentException(
2141         "Only system table creation can use this createSystemTable API");
2142     }
2143
2144     RegionInfo[] newRegions = ModifyRegionUtils.createRegionInfos(tableDescriptor, null);
2145
2146     LOG.info(getClientIdAuditPrefix() + " create " + tableDescriptor);
2147
2148     // This special create table is called locally to master.  Therefore, no RPC means no need
2149     // to use nonce to detect duplicated RPC call.
2150     long procId = this.procedureExecutor.submitProcedure(
2151       new CreateTableProcedure(procedureExecutor.getEnvironment(), tableDescriptor, newRegions));
2152
2153     return procId;
2154   }
2155
2156   private void startActiveMasterManager(int infoPort) throws KeeperException {
2157     String backupZNode = ZNodePaths.joinZNode(
2158       zooKeeper.getZNodePaths().backupMasterAddressesZNode, serverName.toString());
2159     /*
2160     * Add a ZNode for ourselves in the backup master directory since we
2161     * may not become the active master. If so, we want the actual active
2162     * master to know we are backup masters, so that it won't assign
2163     * regions to us if so configured.
2164     *
2165     * If we become the active master later, ActiveMasterManager will delete
2166     * this node explicitly.  If we crash before then, ZooKeeper will delete
2167     * this node for us since it is ephemeral.
2168     */
2169     LOG.info("Adding backup master ZNode " + backupZNode);
2170     if (!MasterAddressTracker.setMasterAddress(zooKeeper, backupZNode, serverName, infoPort)) {
2171       LOG.warn("Failed create of " + backupZNode + " by " + serverName);
2172     }
2173     this.activeMasterManager.setInfoPort(infoPort);
2174     int timeout = conf.getInt(HConstants.ZK_SESSION_TIMEOUT, HConstants.DEFAULT_ZK_SESSION_TIMEOUT);
2175     // If we're a backup master, stall until a primary to write this address
2176     if (conf.getBoolean(HConstants.MASTER_TYPE_BACKUP, HConstants.DEFAULT_MASTER_TYPE_BACKUP)) {
2177       LOG.debug("HMaster started in backup mode. Stalling until master znode is written.");
2178       // This will only be a minute or so while the cluster starts up,
2179       // so don't worry about setting watches on the parent znode
2180       while (!activeMasterManager.hasActiveMaster()) {
2181         LOG.debug("Waiting for master address and cluster state znode to be written.");
2182         Threads.sleep(timeout);
2183       }
2184     }
2185     MonitoredTask status = TaskMonitor.get().createStatus("Master startup");
2186     status.setDescription("Master startup");
2187     try {
2188       if (activeMasterManager.blockUntilBecomingActiveMaster(timeout, status)) {
2189         finishActiveMasterInitialization(status);
2190       }
2191     } catch (Throwable t) {
2192       status.setStatus("Failed to become active: " + t.getMessage());
2193       LOG.error(HBaseMarkers.FATAL, "Failed to become active master", t);
2194       // HBASE-5680: Likely hadoop23 vs hadoop 20.x/1.x incompatibility
2195       if (t instanceof NoClassDefFoundError && t.getMessage().
2196           contains("org/apache/hadoop/hdfs/protocol/HdfsConstants$SafeModeAction")) {
2197         // improved error message for this special case
2198         abort("HBase is having a problem with its Hadoop jars.  You may need to recompile " +
2199           "HBase against Hadoop version " + org.apache.hadoop.util.VersionInfo.getVersion() +
2200           " or change your hadoop jars to start properly", t);
2201       } else {
2202         abort("Unhandled exception. Starting shutdown.", t);
2203       }
2204     } finally {
2205       status.cleanup();
2206     }
2207   }
2208
2209   private static boolean isCatalogTable(final TableName tableName) {
2210     return tableName.equals(TableName.META_TABLE_NAME);
2211   }
2212
2213   @Override
2214   public long deleteTable(
2215       final TableName tableName,
2216       final long nonceGroup,
2217       final long nonce) throws IOException {
2218     checkInitialized();
2219
2220     return MasterProcedureUtil.submitProcedure(
2221         new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2222       @Override
2223       protected void run() throws IOException {
2224         getMaster().getMasterCoprocessorHost().preDeleteTable(tableName);
2225
2226         LOG.info(getClientIdAuditPrefix() + " delete " + tableName);
2227
2228         // TODO: We can handle/merge duplicate request
2229         //
2230         // We need to wait for the procedure to potentially fail due to "prepare" sanity
2231         // checks. This will block only the beginning of the procedure. See HBASE-19953.
2232         ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
2233         submitProcedure(new DeleteTableProcedure(procedureExecutor.getEnvironment(),
2234             tableName, latch));
2235         latch.await();
2236
2237         getMaster().getMasterCoprocessorHost().postDeleteTable(tableName);
2238       }
2239
2240       @Override
2241       protected String getDescription() {
2242         return "DeleteTableProcedure";
2243       }
2244     });
2245   }
2246
2247   @Override
2248   public long truncateTable(
2249       final TableName tableName,
2250       final boolean preserveSplits,
2251       final long nonceGroup,
2252       final long nonce) throws IOException {
2253     checkInitialized();
2254
2255     return MasterProcedureUtil.submitProcedure(
2256         new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2257       @Override
2258       protected void run() throws IOException {
2259         getMaster().getMasterCoprocessorHost().preTruncateTable(tableName);
2260
2261         LOG.info(getClientIdAuditPrefix() + " truncate " + tableName);
2262         ProcedurePrepareLatch latch = ProcedurePrepareLatch.createLatch(2, 0);
2263         submitProcedure(new TruncateTableProcedure(procedureExecutor.getEnvironment(),
2264             tableName, preserveSplits, latch));
2265         latch.await();
2266
2267         getMaster().getMasterCoprocessorHost().postTruncateTable(tableName);
2268       }
2269
2270       @Override
2271       protected String getDescription() {
2272         return "TruncateTableProcedure";
2273       }
2274     });
2275   }
2276
2277   @Override
2278   public long addColumn(final TableName tableName, final ColumnFamilyDescriptor column,
2279       final long nonceGroup, final long nonce) throws IOException {
2280     checkInitialized();
2281     checkTableExists(tableName);
2282
2283     return modifyTable(tableName, new TableDescriptorGetter() {
2284
2285       @Override
2286       public TableDescriptor get() throws IOException {
2287         TableDescriptor old = getTableDescriptors().get(tableName);
2288         if (old.hasColumnFamily(column.getName())) {
2289           throw new InvalidFamilyOperationException("Column family '" + column.getNameAsString()
2290               + "' in table '" + tableName + "' already exists so cannot be added");
2291         }
2292
2293         return TableDescriptorBuilder.newBuilder(old).setColumnFamily(column).build();
2294       }
2295     }, nonceGroup, nonce, true);
2296   }
2297
2298   /**
2299    * Implement to return TableDescriptor after pre-checks
2300    */
2301   protected interface TableDescriptorGetter {
2302     TableDescriptor get() throws IOException;
2303   }
2304
2305   @Override
2306   public long modifyColumn(final TableName tableName, final ColumnFamilyDescriptor descriptor,
2307       final long nonceGroup, final long nonce) throws IOException {
2308     checkInitialized();
2309     checkTableExists(tableName);
2310     return modifyTable(tableName, new TableDescriptorGetter() {
2311
2312       @Override
2313       public TableDescriptor get() throws IOException {
2314         TableDescriptor old = getTableDescriptors().get(tableName);
2315         if (!old.hasColumnFamily(descriptor.getName())) {
2316           throw new InvalidFamilyOperationException("Family '" + descriptor.getNameAsString()
2317               + "' does not exist, so it cannot be modified");
2318         }
2319
2320         return TableDescriptorBuilder.newBuilder(old).modifyColumnFamily(descriptor).build();
2321       }
2322     }, nonceGroup, nonce, true);
2323   }
2324
2325   @Override
2326   public long deleteColumn(final TableName tableName, final byte[] columnName,
2327       final long nonceGroup, final long nonce) throws IOException {
2328     checkInitialized();
2329     checkTableExists(tableName);
2330
2331     return modifyTable(tableName, new TableDescriptorGetter() {
2332
2333       @Override
2334       public TableDescriptor get() throws IOException {
2335         TableDescriptor old = getTableDescriptors().get(tableName);
2336
2337         if (!old.hasColumnFamily(columnName)) {
2338           throw new InvalidFamilyOperationException("Family '" + Bytes.toString(columnName)
2339               + "' does not exist, so it cannot be deleted");
2340         }
2341         if (old.getColumnFamilyCount() == 1) {
2342           throw new InvalidFamilyOperationException("Family '" + Bytes.toString(columnName)
2343               + "' is the only column family in the table, so it cannot be deleted");
2344         }
2345         return TableDescriptorBuilder.newBuilder(old).removeColumnFamily(columnName).build();
2346       }
2347     }, nonceGroup, nonce, true);
2348   }
2349
2350   @Override
2351   public long enableTable(final TableName tableName, final long nonceGroup, final long nonce)
2352       throws IOException {
2353     checkInitialized();
2354
2355     return MasterProcedureUtil.submitProcedure(
2356         new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2357       @Override
2358       protected void run() throws IOException {
2359         getMaster().getMasterCoprocessorHost().preEnableTable(tableName);
2360
2361         // Normally, it would make sense for this authorization check to exist inside
2362         // AccessController, but because the authorization check is done based on internal state
2363         // (rather than explicit permissions) we'll do the check here instead of in the
2364         // coprocessor.
2365         MasterQuotaManager quotaManager = getMasterQuotaManager();
2366         if (quotaManager != null) {
2367           if (quotaManager.isQuotaInitialized()) {
2368               SpaceQuotaSnapshot currSnapshotOfTable =
2369                   QuotaTableUtil.getCurrentSnapshotFromQuotaTable(getConnection(), tableName);
2370               if (currSnapshotOfTable != null) {
2371                 SpaceQuotaStatus quotaStatus = currSnapshotOfTable.getQuotaStatus();
2372                 if (quotaStatus.isInViolation()
2373                     && SpaceViolationPolicy.DISABLE == quotaStatus.getPolicy().orElse(null)) {
2374                 throw new AccessDeniedException("Enabling the table '" + tableName
2375                     + "' is disallowed due to a violated space quota.");
2376               }
2377             }
2378           } else if (LOG.isTraceEnabled()) {
2379             LOG.trace("Unable to check for space quotas as the MasterQuotaManager is not enabled");
2380           }
2381         }
2382
2383         LOG.info(getClientIdAuditPrefix() + " enable " + tableName);
2384
2385         // Execute the operation asynchronously - client will check the progress of the operation
2386         // In case the request is from a <1.1 client before returning,
2387         // we want to make sure that the table is prepared to be
2388         // enabled (the table is locked and the table state is set).
2389         // Note: if the procedure throws exception, we will catch it and rethrow.
2390         final ProcedurePrepareLatch prepareLatch = ProcedurePrepareLatch.createLatch();
2391         submitProcedure(new EnableTableProcedure(procedureExecutor.getEnvironment(),
2392             tableName, prepareLatch));
2393         prepareLatch.await();
2394
2395         getMaster().getMasterCoprocessorHost().postEnableTable(tableName);
2396       }
2397
2398       @Override
2399       protected String getDescription() {
2400         return "EnableTableProcedure";
2401       }
2402     });
2403   }
2404
2405   @Override
2406   public long disableTable(final TableName tableName, final long nonceGroup, final long nonce)
2407       throws IOException {
2408     checkInitialized();
2409
2410     return MasterProcedureUtil.submitProcedure(
2411         new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2412       @Override
2413       protected void run() throws IOException {
2414         getMaster().getMasterCoprocessorHost().preDisableTable(tableName);
2415
2416         LOG.info(getClientIdAuditPrefix() + " disable " + tableName);
2417
2418         // Execute the operation asynchronously - client will check the progress of the operation
2419         // In case the request is from a <1.1 client before returning,
2420         // we want to make sure that the table is prepared to be
2421         // enabled (the table is locked and the table state is set).
2422         // Note: if the procedure throws exception, we will catch it and rethrow.
2423         //
2424         // We need to wait for the procedure to potentially fail due to "prepare" sanity
2425         // checks. This will block only the beginning of the procedure. See HBASE-19953.
2426         final ProcedurePrepareLatch prepareLatch = ProcedurePrepareLatch.createBlockingLatch();
2427         submitProcedure(new DisableTableProcedure(procedureExecutor.getEnvironment(),
2428             tableName, false, prepareLatch));
2429         prepareLatch.await();
2430
2431         getMaster().getMasterCoprocessorHost().postDisableTable(tableName);
2432       }
2433
2434       @Override
2435       protected String getDescription() {
2436         return "DisableTableProcedure";
2437       }
2438     });
2439   }
2440
2441   private long modifyTable(final TableName tableName,
2442       final TableDescriptorGetter newDescriptorGetter, final long nonceGroup, final long nonce,
2443       final boolean shouldCheckDescriptor) throws IOException {
2444     return MasterProcedureUtil
2445         .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2446           @Override
2447           protected void run() throws IOException {
2448             TableDescriptor oldDescriptor = getMaster().getTableDescriptors().get(tableName);
2449             TableDescriptor newDescriptor = getMaster().getMasterCoprocessorHost()
2450                 .preModifyTable(tableName, oldDescriptor, newDescriptorGetter.get());
2451             TableDescriptorChecker.sanityCheck(conf, newDescriptor);
2452             LOG.info("{} modify table {} from {} to {}", getClientIdAuditPrefix(), tableName,
2453                 oldDescriptor, newDescriptor);
2454
2455             // Execute the operation synchronously - wait for the operation completes before
2456             // continuing.
2457             //
2458             // We need to wait for the procedure to potentially fail due to "prepare" sanity
2459             // checks. This will block only the beginning of the procedure. See HBASE-19953.
2460             ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
2461             submitProcedure(new ModifyTableProcedure(procedureExecutor.getEnvironment(),
2462                 newDescriptor, latch, oldDescriptor, shouldCheckDescriptor));
2463             latch.await();
2464
2465             getMaster().getMasterCoprocessorHost().postModifyTable(tableName, oldDescriptor,
2466               newDescriptor);
2467           }
2468
2469           @Override
2470           protected String getDescription() {
2471             return "ModifyTableProcedure";
2472           }
2473         });
2474
2475   }
2476
2477   @Override
2478   public long modifyTable(final TableName tableName, final TableDescriptor newDescriptor,
2479       final long nonceGroup, final long nonce) throws IOException {
2480     checkInitialized();
2481     return modifyTable(tableName, new TableDescriptorGetter() {
2482       @Override
2483       public TableDescriptor get() throws IOException {
2484         return newDescriptor;
2485       }
2486     }, nonceGroup, nonce, false);
2487
2488   }
2489
2490   public long restoreSnapshot(final SnapshotDescription snapshotDesc,
2491       final long nonceGroup, final long nonce, final boolean restoreAcl) throws IOException {
2492     checkInitialized();
2493     getSnapshotManager().checkSnapshotSupport();
2494
2495     // Ensure namespace exists. Will throw exception if non-known NS.
2496     final TableName dstTable = TableName.valueOf(snapshotDesc.getTable());
2497     getClusterSchema().getNamespace(dstTable.getNamespaceAsString());
2498
2499     return MasterProcedureUtil.submitProcedure(
2500         new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2501       @Override
2502       protected void run() throws IOException {
2503           setProcId(
2504             getSnapshotManager().restoreOrCloneSnapshot(snapshotDesc, getNonceKey(), restoreAcl));
2505       }
2506
2507       @Override
2508       protected String getDescription() {
2509         return "RestoreSnapshotProcedure";
2510       }
2511     });
2512   }
2513
2514   private void checkTableExists(final TableName tableName)
2515       throws IOException, TableNotFoundException {
2516     if (!MetaTableAccessor.tableExists(getConnection(), tableName)) {
2517       throw new TableNotFoundException(tableName);
2518     }
2519   }
2520
2521   @Override
2522   public void checkTableModifiable(final TableName tableName)
2523       throws IOException, TableNotFoundException, TableNotDisabledException {
2524     if (isCatalogTable(tableName)) {
2525       throw new IOException("Can't modify catalog tables");
2526     }
2527     checkTableExists(tableName);
2528     TableState ts = getTableStateManager().getTableState(tableName);
2529     if (!ts.isDisabled()) {
2530       throw new TableNotDisabledException("Not DISABLED; " + ts);
2531     }
2532   }
2533
2534   public ClusterMetrics getClusterMetricsWithoutCoprocessor() throws InterruptedIOException {
2535     return getClusterMetricsWithoutCoprocessor(EnumSet.allOf(Option.class));
2536   }
2537
2538   public ClusterMetrics getClusterMetricsWithoutCoprocessor(EnumSet<Option> options)
2539       throws InterruptedIOException {
2540     ClusterMetricsBuilder builder = ClusterMetricsBuilder.newBuilder();
2541     // given that hbase1 can't submit the request with Option,
2542     // we return all information to client if the list of Option is empty.
2543     if (options.isEmpty()) {
2544       options = EnumSet.allOf(Option.class);
2545     }
2546
2547     for (Option opt : options) {
2548       switch (opt) {
2549         case HBASE_VERSION: builder.setHBaseVersion(VersionInfo.getVersion()); break;
2550         case CLUSTER_ID: builder.setClusterId(getClusterId()); break;
2551         case MASTER: builder.setMasterName(getServerName()); break;
2552         case BACKUP_MASTERS: builder.setBackerMasterNames(getBackupMasters()); break;
2553         case LIVE_SERVERS: {
2554           if (serverManager != null) {
2555             builder.setLiveServerMetrics(serverManager.getOnlineServers().entrySet().stream()
2556               .collect(Collectors.toMap(e -> e.getKey(), e -> e.getValue())));
2557           }
2558           break;
2559         }
2560         case DEAD_SERVERS: {
2561           if (serverManager != null) {
2562             builder.setDeadServerNames(new ArrayList<>(
2563               serverManager.getDeadServers().copyServerNames()));
2564           }
2565           break;
2566         }
2567         case MASTER_COPROCESSORS: {
2568           if (cpHost != null) {
2569             builder.setMasterCoprocessorNames(Arrays.asList(getMasterCoprocessors()));
2570           }
2571           break;
2572         }
2573         case REGIONS_IN_TRANSITION: {
2574           if (assignmentManager != null) {
2575             builder.setRegionsInTransition(assignmentManager.getRegionStates()
2576                 .getRegionsStateInTransition());
2577           }
2578           break;
2579         }
2580         case BALANCER_ON: {
2581           if (loadBalancerTracker != null) {
2582             builder.setBalancerOn(loadBalancerTracker.isBalancerOn());
2583           }
2584           break;
2585         }
2586         case MASTER_INFO_PORT: {
2587           if (infoServer != null) {
2588             builder.setMasterInfoPort(infoServer.getPort());
2589           }
2590           break;
2591         }
2592         case SERVERS_NAME: {
2593           if (serverManager != null) {
2594             builder.setServerNames(serverManager.getOnlineServersList());
2595           }
2596           break;
2597         }
2598         case TABLE_TO_REGIONS_COUNT: {
2599           if (isActiveMaster() && isInitialized() && assignmentManager != null) {
2600             try {
2601               Map<TableName, RegionStatesCount> tableRegionStatesCountMap = new HashMap<>();
2602               Map<String, TableDescriptor> tableDescriptorMap = getTableDescriptors().getAll();
2603               for (TableDescriptor tableDescriptor : tableDescriptorMap.values()) {
2604                 TableName tableName = tableDescriptor.getTableName();
2605                 RegionStatesCount regionStatesCount = assignmentManager
2606                   .getRegionStatesCount(tableName);
2607                 tableRegionStatesCountMap.put(tableName, regionStatesCount);
2608               }
2609               builder.setTableRegionStatesCount(tableRegionStatesCountMap);
2610             } catch (IOException e) {
2611               LOG.error("Error while populating TABLE_TO_REGIONS_COUNT for Cluster Metrics..", e);
2612             }
2613           }
2614           break;
2615         }
2616       }
2617     }
2618     return builder.build();
2619   }
2620
2621   /**
2622    * @return cluster status
2623    */
2624   public ClusterMetrics getClusterMetrics() throws IOException {
2625     return getClusterMetrics(EnumSet.allOf(Option.class));
2626   }
2627
2628   public ClusterMetrics getClusterMetrics(EnumSet<Option> options) throws IOException {
2629     if (cpHost != null) {
2630       cpHost.preGetClusterMetrics();
2631     }
2632     ClusterMetrics status = getClusterMetricsWithoutCoprocessor(options);
2633     if (cpHost != null) {
2634       cpHost.postGetClusterMetrics(status);
2635     }
2636     return status;
2637   }
2638
2639   private List<ServerName> getBackupMasters() throws InterruptedIOException {
2640     // Build Set of backup masters from ZK nodes
2641     List<String> backupMasterStrings;
2642     try {
2643       backupMasterStrings = ZKUtil.listChildrenNoWatch(this.zooKeeper,
2644         this.zooKeeper.getZNodePaths().backupMasterAddressesZNode);
2645     } catch (KeeperException e) {
2646       LOG.warn(this.zooKeeper.prefix("Unable to list backup servers"), e);
2647       backupMasterStrings = null;
2648     }
2649
2650     List<ServerName> backupMasters = Collections.emptyList();
2651     if (backupMasterStrings != null && !backupMasterStrings.isEmpty()) {
2652       backupMasters = new ArrayList<>(backupMasterStrings.size());
2653       for (String s: backupMasterStrings) {
2654         try {
2655           byte [] bytes;
2656           try {
2657             bytes = ZKUtil.getData(this.zooKeeper, ZNodePaths.joinZNode(
2658                 this.zooKeeper.getZNodePaths().backupMasterAddressesZNode, s));
2659           } catch (InterruptedException e) {
2660             throw new InterruptedIOException();
2661           }
2662           if (bytes != null) {
2663             ServerName sn;
2664             try {
2665               sn = ProtobufUtil.parseServerNameFrom(bytes);
2666             } catch (DeserializationException e) {
2667               LOG.warn("Failed parse, skipping registering backup server", e);
2668               continue;
2669             }
2670             backupMasters.add(sn);
2671           }
2672         } catch (KeeperException e) {
2673           LOG.warn(this.zooKeeper.prefix("Unable to get information about " +
2674                    "backup servers"), e);
2675         }
2676       }
2677       Collections.sort(backupMasters, new Comparator<ServerName>() {
2678         @Override
2679         public int compare(ServerName s1, ServerName s2) {
2680           return s1.getServerName().compareTo(s2.getServerName());
2681         }});
2682     }
2683     return backupMasters;
2684   }
2685
2686   /**
2687    * The set of loaded coprocessors is stored in a static set. Since it's
2688    * statically allocated, it does not require that HMaster's cpHost be
2689    * initialized prior to accessing it.
2690    * @return a String representation of the set of names of the loaded coprocessors.
2691    */
2692   public static String getLoadedCoprocessors() {
2693     return CoprocessorHost.getLoadedCoprocessors().toString();
2694   }
2695
2696   /**
2697    * @return timestamp in millis when HMaster was started.
2698    */
2699   public long getMasterStartTime() {
2700     return startcode;
2701   }
2702
2703   /**
2704    * @return timestamp in millis when HMaster became the active master.
2705    */
2706   public long getMasterActiveTime() {
2707     return masterActiveTime;
2708   }
2709
2710   /**
2711    * @return timestamp in millis when HMaster finished becoming the active master
2712    */
2713   public long getMasterFinishedInitializationTime() {
2714     return masterFinishedInitializationTime;
2715   }
2716
2717   public int getNumWALFiles() {
2718     return 0;
2719   }
2720
2721   public ProcedureStore getProcedureStore() {
2722     return procedureStore;
2723   }
2724
2725   public int getRegionServerInfoPort(final ServerName sn) {
2726     int port = this.serverManager.getInfoPort(sn);
2727     return port == 0 ? conf.getInt(HConstants.REGIONSERVER_INFO_PORT,
2728       HConstants.DEFAULT_REGIONSERVER_INFOPORT) : port;
2729   }
2730
2731   @Override
2732   public String getRegionServerVersion(ServerName sn) {
2733     // Will return "0.0.0" if the server is not online to prevent move system region to unknown
2734     // version RS.
2735     return this.serverManager.getVersion(sn);
2736   }
2737
2738   @Override
2739   public void checkIfShouldMoveSystemRegionAsync() {
2740     assignmentManager.checkIfShouldMoveSystemRegionAsync();
2741   }
2742
2743   /**
2744    * @return array of coprocessor SimpleNames.
2745    */
2746   public String[] getMasterCoprocessors() {
2747     Set<String> masterCoprocessors = getMasterCoprocessorHost().getCoprocessors();
2748     return masterCoprocessors.toArray(new String[masterCoprocessors.size()]);
2749   }
2750
2751   @Override
2752   public void abort(String reason, Throwable cause) {
2753     if (isAborted() || isStopped()) {
2754       return;
2755     }
2756     setAbortRequested();
2757     if (cpHost != null) {
2758       // HBASE-4014: dump a list of loaded coprocessors.
2759       LOG.error(HBaseMarkers.FATAL, "Master server abort: loaded coprocessors are: " +
2760           getLoadedCoprocessors());
2761     }
2762     String msg = "***** ABORTING master " + this + ": " + reason + " *****";
2763     if (cause != null) {
2764       LOG.error(HBaseMarkers.FATAL, msg, cause);
2765     } else {
2766       LOG.error(HBaseMarkers.FATAL, msg);
2767     }
2768
2769     try {
2770       stopMaster();
2771     } catch (IOException e) {
2772       LOG.error("Exception occurred while stopping master", e);
2773     }
2774   }
2775
2776   @Override
2777   public ZKWatcher getZooKeeper() {
2778     return zooKeeper;
2779   }
2780
2781   @Override
2782   public MasterCoprocessorHost getMasterCoprocessorHost() {
2783     return cpHost;
2784   }
2785
2786   @Override
2787   public MasterQuotaManager getMasterQuotaManager() {
2788     return quotaManager;
2789   }
2790
2791   @Override
2792   public ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() {
2793     return procedureExecutor;
2794   }
2795
2796   @Override
2797   public ServerName getServerName() {
2798     return this.serverName;
2799   }
2800
2801   @Override
2802   public AssignmentManager getAssignmentManager() {
2803     return this.assignmentManager;
2804   }
2805
2806   @Override
2807   public CatalogJanitor getCatalogJanitor() {
2808     return this.catalogJanitorChore;
2809   }
2810
2811   public MemoryBoundedLogMessageBuffer getRegionServerFatalLogBuffer() {
2812     return rsFatals;
2813   }
2814
2815   /**
2816    * Shutdown the cluster.
2817    * Master runs a coordinated stop of all RegionServers and then itself.
2818    */
2819   public void shutdown() throws IOException {
2820     if (cpHost != null) {
2821       cpHost.preShutdown();
2822     }
2823
2824     // Tell the servermanager cluster shutdown has been called. This makes it so when Master is
2825     // last running server, it'll stop itself. Next, we broadcast the cluster shutdown by setting
2826     // the cluster status as down. RegionServers will notice this change in state and will start
2827     // shutting themselves down. When last has exited, Master can go down.
2828     if (this.serverManager != null) {
2829       this.serverManager.shutdownCluster();
2830     }
2831     if (this.clusterStatusTracker != null) {
2832       try {
2833         this.clusterStatusTracker.setClusterDown();
2834       } catch (KeeperException e) {
2835         LOG.error("ZooKeeper exception trying to set cluster as down in ZK", e);
2836       }
2837     }
2838     // Stop the procedure executor. Will stop any ongoing assign, unassign, server crash etc.,
2839     // processing so we can go down.
2840     if (this.procedureExecutor != null) {
2841       this.procedureExecutor.stop();
2842     }
2843     // Shutdown our cluster connection. This will kill any hosted RPCs that might be going on;
2844     // this is what we want especially if the Master is in startup phase doing call outs to
2845     // hbase:meta, etc. when cluster is down. Without ths connection close, we'd have to wait on
2846     // the rpc to timeout.
2847     if (this.asyncClusterConnection != null) {
2848       this.asyncClusterConnection.close();
2849     }
2850   }
2851
2852   public void stopMaster() throws IOException {
2853     if (cpHost != null) {
2854       cpHost.preStopMaster();
2855     }
2856     stop("Stopped by " + Thread.currentThread().getName());
2857   }
2858
2859   @Override
2860   public void stop(String msg) {
2861     if (!isStopped()) {
2862       super.stop(msg);
2863       if (this.activeMasterManager != null) {
2864         this.activeMasterManager.stop();
2865       }
2866     }
2867   }
2868
2869   @VisibleForTesting
2870   protected void checkServiceStarted() throws ServerNotRunningYetException {
2871     if (!serviceStarted) {
2872       throw new ServerNotRunningYetException("Server is not running yet");
2873     }
2874   }
2875
2876   public static class MasterStoppedException extends DoNotRetryIOException {
2877     MasterStoppedException() {
2878       super();
2879     }
2880   }
2881
2882   void checkInitialized() throws PleaseHoldException, ServerNotRunningYetException,
2883       MasterNotRunningException, MasterStoppedException {
2884     checkServiceStarted();
2885     if (!isInitialized()) {
2886       throw new PleaseHoldException("Master is initializing");
2887     }
2888     if (isStopped()) {
2889       throw new MasterStoppedException();
2890     }
2891   }
2892
2893   /**
2894    * Report whether this master is currently the active master or not.
2895    * If not active master, we are parked on ZK waiting to become active.
2896    *
2897    * This method is used for testing.
2898    *
2899    * @return true if active master, false if not.
2900    */
2901   @Override
2902   public boolean isActiveMaster() {
2903     return activeMaster;
2904   }
2905
2906   /**
2907    * Report whether this master has completed with its initialization and is
2908    * ready.  If ready, the master is also the active master.  A standby master
2909    * is never ready.
2910    *
2911    * This method is used for testing.
2912    *
2913    * @return true if master is ready to go, false if not.
2914    */
2915   @Override
2916   public boolean isInitialized() {
2917     return initialized.isReady();
2918   }
2919
2920   /**
2921    * Report whether this master is in maintenance mode.
2922    *
2923    * @return true if master is in maintenanceMode
2924    */
2925   @Override
2926   public boolean isInMaintenanceMode() {
2927     return maintenanceMode;
2928   }
2929
2930   @VisibleForTesting
2931   public void setInitialized(boolean isInitialized) {
2932     procedureExecutor.getEnvironment().setEventReady(initialized, isInitialized);
2933   }
2934
2935   @Override
2936   public ProcedureEvent<?> getInitializedEvent() {
2937     return initialized;
2938   }
2939
2940   /**
2941    * Compute the average load across all region servers.
2942    * Currently, this uses a very naive computation - just uses the number of
2943    * regions being served, ignoring stats about number of requests.
2944    * @return the average load
2945    */
2946   public double getAverageLoad() {
2947     if (this.assignmentManager == null) {
2948       return 0;
2949     }
2950
2951     RegionStates regionStates = this.assignmentManager.getRegionStates();
2952     if (regionStates == null) {
2953       return 0;
2954     }
2955     return regionStates.getAverageLoad();
2956   }
2957
2958   /*
2959    * @return the count of region split plans executed
2960    */
2961   public long getSplitPlanCount() {
2962     return splitPlanCount;
2963   }
2964
2965   /*
2966    * @return the count of region merge plans executed
2967    */
2968   public long getMergePlanCount() {
2969     return mergePlanCount;
2970   }
2971
2972   @Override
2973   public boolean registerService(Service instance) {
2974     /*
2975      * No stacking of instances is allowed for a single service name
2976      */
2977     Descriptors.ServiceDescriptor serviceDesc = instance.getDescriptorForType();
2978     String serviceName = CoprocessorRpcUtils.getServiceName(serviceDesc);
2979     if (coprocessorServiceHandlers.containsKey(serviceName)) {
2980       LOG.error("Coprocessor service "+serviceName+
2981           " already registered, rejecting request from "+instance
2982       );
2983       return false;
2984     }
2985
2986     coprocessorServiceHandlers.put(serviceName, instance);
2987     if (LOG.isDebugEnabled()) {
2988       LOG.debug("Registered master coprocessor service: service="+serviceName);
2989     }
2990     return true;
2991   }
2992
2993   /**
2994    * Utility for constructing an instance of the passed HMaster class.
2995    * @param masterClass
2996    * @return HMaster instance.
2997    */
2998   public static HMaster constructMaster(Class<? extends HMaster> masterClass,
2999       final Configuration conf)  {
3000     try {
3001       Constructor<? extends HMaster> c = masterClass.getConstructor(Configuration.class);
3002       return c.newInstance(conf);
3003     } catch(Exception e) {
3004       Throwable error = e;
3005       if (e instanceof InvocationTargetException &&
3006           ((InvocationTargetException)e).getTargetException() != null) {
3007         error = ((InvocationTargetException)e).getTargetException();
3008       }
3009       throw new RuntimeException("Failed construction of Master: " + masterClass.toString() + ". "
3010         , error);
3011     }
3012   }
3013
3014   /**
3015    * @see org.apache.hadoop.hbase.master.HMasterCommandLine
3016    */
3017   public static void main(String [] args) {
3018     LOG.info("STARTING service " + HMaster.class.getSimpleName());
3019     VersionInfo.logVersion();
3020     new HMasterCommandLine(HMaster.class).doMain(args);
3021   }
3022
3023   public HFileCleaner getHFileCleaner() {
3024     return this.hfileCleaner;
3025   }
3026
3027   public LogCleaner getLogCleaner() {
3028     return this.logCleaner;
3029   }
3030
3031   /**
3032    * @return the underlying snapshot manager
3033    */
3034   @Override
3035   public SnapshotManager getSnapshotManager() {
3036     return this.snapshotManager;
3037   }
3038
3039   /**
3040    * @return the underlying MasterProcedureManagerHost
3041    */
3042   @Override
3043   public MasterProcedureManagerHost getMasterProcedureManagerHost() {
3044     return mpmHost;
3045   }
3046
3047   @Override
3048   public ClusterSchema getClusterSchema() {
3049     return this.clusterSchemaService;
3050   }
3051
3052   /**
3053    * Create a new Namespace.
3054    * @param namespaceDescriptor descriptor for new Namespace
3055    * @param nonceGroup Identifier for the source of the request, a client or process.
3056    * @param nonce A unique identifier for this operation from the client or process identified by
3057    * <code>nonceGroup</code> (the source must ensure each operation gets a unique id).
3058    * @return procedure id
3059    */
3060   long createNamespace(final NamespaceDescriptor namespaceDescriptor, final long nonceGroup,
3061       final long nonce) throws IOException {
3062     checkInitialized();
3063
3064     TableName.isLegalNamespaceName(Bytes.toBytes(namespaceDescriptor.getName()));
3065
3066     return MasterProcedureUtil.submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this,
3067           nonceGroup, nonce) {
3068       @Override
3069       protected void run() throws IOException {
3070         getMaster().getMasterCoprocessorHost().preCreateNamespace(namespaceDescriptor);
3071         // We need to wait for the procedure to potentially fail due to "prepare" sanity
3072         // checks. This will block only the beginning of the procedure. See HBASE-19953.
3073         ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
3074         LOG.info(getClientIdAuditPrefix() + " creating " + namespaceDescriptor);
3075         // Execute the operation synchronously - wait for the operation to complete before
3076         // continuing.
3077         setProcId(getClusterSchema().createNamespace(namespaceDescriptor, getNonceKey(), latch));
3078         latch.await();
3079         getMaster().getMasterCoprocessorHost().postCreateNamespace(namespaceDescriptor);
3080       }
3081
3082       @Override
3083       protected String getDescription() {
3084         return "CreateNamespaceProcedure";
3085       }
3086     });
3087   }
3088
3089   /**
3090    * Modify an existing Namespace.
3091    * @param nonceGroup Identifier for the source of the request, a client or process.
3092    * @param nonce A unique identifier for this operation from the client or process identified by
3093    * <code>nonceGroup</code> (the source must ensure each operation gets a unique id).
3094    * @return procedure id
3095    */
3096   long modifyNamespace(final NamespaceDescriptor newNsDescriptor, final long nonceGroup,
3097       final long nonce) throws IOException {
3098     checkInitialized();
3099
3100     TableName.isLegalNamespaceName(Bytes.toBytes(newNsDescriptor.getName()));
3101
3102     return MasterProcedureUtil.submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this,
3103           nonceGroup, nonce) {
3104       @Override
3105       protected void run() throws IOException {
3106         NamespaceDescriptor oldNsDescriptor = getNamespace(newNsDescriptor.getName());
3107         getMaster().getMasterCoprocessorHost().preModifyNamespace(oldNsDescriptor, newNsDescriptor);
3108         // We need to wait for the procedure to potentially fail due to "prepare" sanity
3109         // checks. This will block only the beginning of the procedure. See HBASE-19953.
3110         ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
3111         LOG.info(getClientIdAuditPrefix() + " modify " + newNsDescriptor);
3112         // Execute the operation synchronously - wait for the operation to complete before
3113         // continuing.
3114         setProcId(getClusterSchema().modifyNamespace(newNsDescriptor, getNonceKey(), latch));
3115         latch.await();
3116         getMaster().getMasterCoprocessorHost().postModifyNamespace(oldNsDescriptor,
3117           newNsDescriptor);
3118       }
3119
3120       @Override
3121       protected String getDescription() {
3122         return "ModifyNamespaceProcedure";
3123       }
3124     });
3125   }
3126
3127   /**
3128    * Delete an existing Namespace. Only empty Namespaces (no tables) can be removed.
3129    * @param nonceGroup Identifier for the source of the request, a client or process.
3130    * @param nonce A unique identifier for this operation from the client or process identified by
3131    * <code>nonceGroup</code> (the source must ensure each operation gets a unique id).
3132    * @return procedure id
3133    */
3134   long deleteNamespace(final String name, final long nonceGroup, final long nonce)
3135       throws IOException {
3136     checkInitialized();
3137
3138     return MasterProcedureUtil.submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this,
3139           nonceGroup, nonce) {
3140       @Override
3141       protected void run() throws IOException {
3142         getMaster().getMasterCoprocessorHost().preDeleteNamespace(name);
3143         LOG.info(getClientIdAuditPrefix() + " delete " + name);
3144         // Execute the operation synchronously - wait for the operation to complete before
3145         // continuing.
3146         //
3147         // We need to wait for the procedure to potentially fail due to "prepare" sanity
3148         // checks. This will block only the beginning of the procedure. See HBASE-19953.
3149         ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
3150         setProcId(submitProcedure(
3151               new DeleteNamespaceProcedure(procedureExecutor.getEnvironment(), name, latch)));
3152         latch.await();
3153         // Will not be invoked in the face of Exception thrown by the Procedure's execution
3154         getMaster().getMasterCoprocessorHost().postDeleteNamespace(name);
3155       }
3156
3157       @Override
3158       protected String getDescription() {
3159         return "DeleteNamespaceProcedure";
3160       }
3161     });
3162   }
3163
3164   /**
3165    * Get a Namespace
3166    * @param name Name of the Namespace
3167    * @return Namespace descriptor for <code>name</code>
3168    */
3169   NamespaceDescriptor getNamespace(String name) throws IOException {
3170     checkInitialized();
3171     if (this.cpHost != null) this.cpHost.preGetNamespaceDescriptor(name);
3172     NamespaceDescriptor nsd = this.clusterSchemaService.getNamespace(name);
3173     if (this.cpHost != null) this.cpHost.postGetNamespaceDescriptor(nsd);
3174     return nsd;
3175   }
3176
3177   /**
3178    * Get all Namespaces
3179    * @return All Namespace descriptors
3180    */
3181   List<NamespaceDescriptor> getNamespaces() throws IOException {
3182     checkInitialized();
3183     final List<NamespaceDescriptor> nsds = new ArrayList<>();
3184     if (cpHost != null) {
3185       cpHost.preListNamespaceDescriptors(nsds);
3186     }
3187     nsds.addAll(this.clusterSchemaService.getNamespaces());
3188     if (this.cpHost != null) {
3189       this.cpHost.postListNamespaceDescriptors(nsds);
3190     }
3191     return nsds;
3192   }
3193
3194   /**
3195    * List namespace names
3196    * @return All namespace names
3197    */
3198   public List<String> listNamespaces() throws IOException {
3199     checkInitialized();
3200     List<String> namespaces = new ArrayList<>();
3201     if (cpHost != null) {
3202       cpHost.preListNamespaces(namespaces);
3203     }
3204     for (NamespaceDescriptor namespace : clusterSchemaService.getNamespaces()) {
3205       namespaces.add(namespace.getName());
3206     }
3207     if (cpHost != null) {
3208       cpHost.postListNamespaces(namespaces);
3209     }
3210     return namespaces;
3211   }
3212
3213   @Override
3214   public List<TableName> listTableNamesByNamespace(String name) throws IOException {
3215     checkInitialized();
3216     return listTableNames(name, null, true);
3217   }
3218
3219   @Override
3220   public List<TableDescriptor> listTableDescriptorsByNamespace(String name) throws IOException {
3221     checkInitialized();
3222     return listTableDescriptors(name, null, null, true);
3223   }
3224
3225   @Override
3226   public boolean abortProcedure(final long procId, final boolean mayInterruptIfRunning)
3227       throws IOException {
3228     if (cpHost != null) {
3229       cpHost.preAbortProcedure(this.procedureExecutor, procId);
3230     }
3231
3232     final boolean result = this.procedureExecutor.abort(procId, mayInterruptIfRunning);
3233
3234     if (cpHost != null) {
3235       cpHost.postAbortProcedure();
3236     }
3237
3238     return result;
3239   }
3240
3241   @Override
3242   public List<Procedure<?>> getProcedures() throws IOException {
3243     if (cpHost != null) {
3244       cpHost.preGetProcedures();
3245     }
3246
3247     @SuppressWarnings({ "unchecked", "rawtypes" })
3248     List<Procedure<?>> procList = (List) this.procedureExecutor.getProcedures();
3249
3250     if (cpHost != null) {
3251       cpHost.postGetProcedures(procList);
3252     }
3253
3254     return procList;
3255   }
3256
3257   @Override
3258   public List<LockedResource> getLocks() throws IOException {
3259     if (cpHost != null) {
3260       cpHost.preGetLocks();
3261     }
3262
3263     MasterProcedureScheduler procedureScheduler =
3264       procedureExecutor.getEnvironment().getProcedureScheduler();
3265
3266     final List<LockedResource> lockedResources = procedureScheduler.getLocks();
3267
3268     if (cpHost != null) {
3269       cpHost.postGetLocks(lockedResources);
3270     }
3271
3272     return lockedResources;
3273   }
3274
3275   /**
3276    * Returns the list of table descriptors that match the specified request
3277    * @param namespace the namespace to query, or null if querying for all
3278    * @param regex The regular expression to match against, or null if querying for all
3279    * @param tableNameList the list of table names, or null if querying for all
3280    * @param includeSysTables False to match only against userspace tables
3281    * @return the list of table descriptors
3282    */
3283   public List<TableDescriptor> listTableDescriptors(final String namespace, final String regex,
3284       final List<TableName> tableNameList, final boolean includeSysTables)
3285   throws IOException {
3286     List<TableDescriptor> htds = new ArrayList<>();
3287     if (cpHost != null) {
3288       cpHost.preGetTableDescriptors(tableNameList, htds, regex);
3289     }
3290     htds = getTableDescriptors(htds, namespace, regex, tableNameList, includeSysTables);
3291     if (cpHost != null) {
3292       cpHost.postGetTableDescriptors(tableNameList, htds, regex);
3293     }
3294     return htds;
3295   }
3296
3297   /**
3298    * Returns the list of table names that match the specified request
3299    * @param regex The regular expression to match against, or null if querying for all
3300    * @param namespace the namespace to query, or null if querying for all
3301    * @param includeSysTables False to match only against userspace tables
3302    * @return the list of table names
3303    */
3304   public List<TableName> listTableNames(final String namespace, final String regex,
3305       final boolean includeSysTables) throws IOException {
3306     List<TableDescriptor> htds = new ArrayList<>();
3307     if (cpHost != null) {
3308       cpHost.preGetTableNames(htds, regex);
3309     }
3310     htds = getTableDescriptors(htds, namespace, regex, null, includeSysTables);
3311     if (cpHost != null) {
3312       cpHost.postGetTableNames(htds, regex);
3313     }
3314     List<TableName> result = new ArrayList<>(htds.size());
3315     for (TableDescriptor htd: htds) result.add(htd.getTableName());
3316     return result;
3317   }
3318
3319   /**
3320    * @return list of table table descriptors after filtering by regex and whether to include system
3321    *    tables, etc.
3322    * @throws IOException
3323    */
3324   private List<TableDescriptor> getTableDescriptors(final List<TableDescriptor> htds,
3325       final String namespace, final String regex, final List<TableName> tableNameList,
3326       final boolean includeSysTables)
3327   throws IOException {
3328     if (tableNameList == null || tableNameList.isEmpty()) {
3329       // request for all TableDescriptors
3330       Collection<TableDescriptor> allHtds;
3331       if (namespace != null && namespace.length() > 0) {
3332         // Do a check on the namespace existence. Will fail if does not exist.
3333         this.clusterSchemaService.getNamespace(namespace);
3334         allHtds = tableDescriptors.getByNamespace(namespace).values();
3335       } else {
3336         allHtds = tableDescriptors.getAll().values();
3337       }
3338       for (TableDescriptor desc: allHtds) {
3339         if (tableStateManager.isTablePresent(desc.getTableName())
3340             && (includeSysTables || !desc.getTableName().isSystemTable())) {
3341           htds.add(desc);
3342         }
3343       }
3344     } else {
3345       for (TableName s: tableNameList) {
3346         if (tableStateManager.isTablePresent(s)) {
3347           TableDescriptor desc = tableDescriptors.get(s);
3348           if (desc != null) {
3349             htds.add(desc);
3350           }
3351         }
3352       }
3353     }
3354
3355     // Retains only those matched by regular expression.
3356     if (regex != null) filterTablesByRegex(htds, Pattern.compile(regex));
3357     return htds;
3358   }
3359
3360   /**
3361    * Removes the table descriptors that don't match the pattern.
3362    * @param descriptors list of table descriptors to filter
3363    * @param pattern the regex to use
3364    */
3365   private static void filterTablesByRegex(final Collection<TableDescriptor> descriptors,
3366       final Pattern pattern) {
3367     final String defaultNS = NamespaceDescriptor.DEFAULT_NAMESPACE_NAME_STR;
3368     Iterator<TableDescriptor> itr = descriptors.iterator();
3369     while (itr.hasNext()) {
3370       TableDescriptor htd = itr.next();
3371       String tableName = htd.getTableName().getNameAsString();
3372       boolean matched = pattern.matcher(tableName).matches();
3373       if (!matched && htd.getTableName().getNamespaceAsString().equals(defaultNS)) {
3374         matched = pattern.matcher(defaultNS + TableName.NAMESPACE_DELIM + tableName).matches();
3375       }
3376       if (!matched) {
3377         itr.remove();
3378       }
3379     }
3380   }
3381
3382   @Override
3383   public long getLastMajorCompactionTimestamp(TableName table) throws IOException {
3384     return getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
3385         .getLastMajorCompactionTimestamp(table);
3386   }
3387
3388   @Override
3389   public long getLastMajorCompactionTimestampForRegion(byte[] regionName) throws IOException {
3390     return getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
3391         .getLastMajorCompactionTimestamp(regionName);
3392   }
3393
3394   /**
3395    * Gets the mob file compaction state for a specific table.
3396    * Whether all the mob files are selected is known during the compaction execution, but
3397    * the statistic is done just before compaction starts, it is hard to know the compaction
3398    * type at that time, so the rough statistics are chosen for the mob file compaction. Only two
3399    * compaction states are available, CompactionState.MAJOR_AND_MINOR and CompactionState.NONE.
3400    * @param tableName The current table name.
3401    * @return If a given table is in mob file compaction now.
3402    */
3403   public CompactionState getMobCompactionState(TableName tableName) {
3404     AtomicInteger compactionsCount = mobCompactionStates.get(tableName);
3405     if (compactionsCount != null && compactionsCount.get() != 0) {
3406       return CompactionState.MAJOR_AND_MINOR;
3407     }
3408     return CompactionState.NONE;
3409   }
3410
3411   public void reportMobCompactionStart(TableName tableName) throws IOException {
3412     IdLock.Entry lockEntry = null;
3413     try {
3414       lockEntry = mobCompactionLock.getLockEntry(tableName.hashCode());
3415       AtomicInteger compactionsCount = mobCompactionStates.get(tableName);
3416       if (compactionsCount == null) {
3417         compactionsCount = new AtomicInteger(0);
3418         mobCompactionStates.put(tableName, compactionsCount);
3419       }
3420       compactionsCount.incrementAndGet();
3421     } finally {
3422       if (lockEntry != null) {
3423         mobCompactionLock.releaseLockEntry(lockEntry);
3424       }
3425     }
3426   }
3427
3428   public void reportMobCompactionEnd(TableName tableName) throws IOException {
3429     IdLock.Entry lockEntry = null;
3430     try {
3431       lockEntry = mobCompactionLock.getLockEntry(tableName.hashCode());
3432       AtomicInteger compactionsCount = mobCompactionStates.get(tableName);
3433       if (compactionsCount != null) {
3434         int count = compactionsCount.decrementAndGet();
3435         // remove the entry if the count is 0.
3436         if (count == 0) {
3437           mobCompactionStates.remove(tableName);
3438         }
3439       }
3440     } finally {
3441       if (lockEntry != null) {
3442         mobCompactionLock.releaseLockEntry(lockEntry);
3443       }
3444     }
3445   }
3446
3447   /**
3448    * Requests mob compaction.
3449    * @param tableName The table the compact.
3450    * @param columns The compacted columns.
3451    * @param allFiles Whether add all mob files into the compaction.
3452    */
3453   public void requestMobCompaction(TableName tableName,
3454                                    List<ColumnFamilyDescriptor> columns, boolean allFiles) throws IOException {
3455     mobCompactThread.requestMobCompaction(conf, getFileSystem(), tableName, columns, allFiles);
3456   }
3457
3458   /**
3459    * Queries the state of the {@link LoadBalancerTracker}. If the balancer is not initialized,
3460    * false is returned.
3461    *
3462    * @return The state of the load balancer, or false if the load balancer isn't defined.
3463    */
3464   public boolean isBalancerOn() {
3465     return !isInMaintenanceMode()
3466         && loadBalancerTracker != null
3467         && loadBalancerTracker.isBalancerOn();
3468   }
3469
3470   /**
3471    * Queries the state of the {@link RegionNormalizerTracker}. If it's not initialized,
3472    * false is returned.
3473    */
3474   public boolean isNormalizerOn() {
3475     return !isInMaintenanceMode()
3476         && regionNormalizerTracker != null
3477         && regionNormalizerTracker.isNormalizerOn();
3478   }
3479
3480   /**
3481    * Queries the state of the {@link SplitOrMergeTracker}. If it is not initialized,
3482    * false is returned. If switchType is illegal, false will return.
3483    * @param switchType see {@link org.apache.hadoop.hbase.client.MasterSwitchType}
3484    * @return The state of the switch
3485    */
3486   @Override
3487   public boolean isSplitOrMergeEnabled(MasterSwitchType switchType) {
3488     return !isInMaintenanceMode()
3489         && splitOrMergeTracker != null
3490         && splitOrMergeTracker.isSplitOrMergeEnabled(switchType);
3491   }
3492
3493   /**
3494    * Fetch the configured {@link LoadBalancer} class name. If none is set, a default is returned.
3495    *
3496    * @return The name of the {@link LoadBalancer} in use.
3497    */
3498   public String getLoadBalancerClassName() {
3499     return conf.get(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, LoadBalancerFactory
3500         .getDefaultLoadBalancerClass().getName());
3501   }
3502
3503   /**
3504    * @return RegionNormalizerTracker instance
3505    */
3506   public RegionNormalizerTracker getRegionNormalizerTracker() {
3507     return regionNormalizerTracker;
3508   }
3509
3510   public SplitOrMergeTracker getSplitOrMergeTracker() {
3511     return splitOrMergeTracker;
3512   }
3513
3514   @Override
3515   public LoadBalancer getLoadBalancer() {
3516     return balancer;
3517   }
3518
3519   @Override
3520   public FavoredNodesManager getFavoredNodesManager() {
3521     return favoredNodesManager;
3522   }
3523
3524   private long executePeerProcedure(AbstractPeerProcedure<?> procedure) throws IOException {
3525     long procId = procedureExecutor.submitProcedure(procedure);
3526     procedure.getLatch().await();
3527     return procId;
3528   }
3529
3530   @Override
3531   public long addReplicationPeer(String peerId, ReplicationPeerConfig peerConfig, boolean enabled)
3532       throws ReplicationException, IOException {
3533     LOG.info(getClientIdAuditPrefix() + " creating replication peer, id=" + peerId + ", config=" +
3534       peerConfig + ", state=" + (enabled ? "ENABLED" : "DISABLED"));
3535     return executePeerProcedure(new AddPeerProcedure(peerId, peerConfig, enabled));
3536   }
3537
3538   @Override
3539   public long removeReplicationPeer(String peerId) throws ReplicationException, IOException {
3540     LOG.info(getClientIdAuditPrefix() + " removing replication peer, id=" + peerId);
3541     return executePeerProcedure(new RemovePeerProcedure(peerId));
3542   }
3543
3544   @Override
3545   public long enableReplicationPeer(String peerId) throws ReplicationException, IOException {
3546     LOG.info(getClientIdAuditPrefix() + " enable replication peer, id=" + peerId);
3547     return executePeerProcedure(new EnablePeerProcedure(peerId));
3548   }
3549
3550   @Override
3551   public long disableReplicationPeer(String peerId) throws ReplicationException, IOException {
3552     LOG.info(getClientIdAuditPrefix() + " disable replication peer, id=" + peerId);
3553     return executePeerProcedure(new DisablePeerProcedure(peerId));
3554   }
3555
3556   @Override
3557   public ReplicationPeerConfig getReplicationPeerConfig(String peerId)
3558       throws ReplicationException, IOException {
3559     if (cpHost != null) {
3560       cpHost.preGetReplicationPeerConfig(peerId);
3561     }
3562     LOG.info(getClientIdAuditPrefix() + " get replication peer config, id=" + peerId);
3563     ReplicationPeerConfig peerConfig = this.replicationPeerManager.getPeerConfig(peerId)
3564         .orElseThrow(() -> new ReplicationPeerNotFoundException(peerId));
3565     if (cpHost != null) {
3566       cpHost.postGetReplicationPeerConfig(peerId);
3567     }
3568     return peerConfig;
3569   }
3570
3571   @Override
3572   public long updateReplicationPeerConfig(String peerId, ReplicationPeerConfig peerConfig)
3573       throws ReplicationException, IOException {
3574     LOG.info(getClientIdAuditPrefix() + " update replication peer config, id=" + peerId +
3575       ", config=" + peerConfig);
3576     return executePeerProcedure(new UpdatePeerConfigProcedure(peerId, peerConfig));
3577   }
3578
3579   @Override
3580   public List<ReplicationPeerDescription> listReplicationPeers(String regex)
3581       throws ReplicationException, IOException {
3582     if (cpHost != null) {
3583       cpHost.preListReplicationPeers(regex);
3584     }
3585     LOG.debug("{} list replication peers, regex={}", getClientIdAuditPrefix(), regex);
3586     Pattern pattern = regex == null ? null : Pattern.compile(regex);
3587     List<ReplicationPeerDescription> peers =
3588       this.replicationPeerManager.listPeers(pattern);
3589     if (cpHost != null) {
3590       cpHost.postListReplicationPeers(regex);
3591     }
3592     return peers;
3593   }
3594
3595   @Override
3596   public long transitReplicationPeerSyncReplicationState(String peerId, SyncReplicationState state)
3597     throws ReplicationException, IOException {
3598     LOG.info(
3599       getClientIdAuditPrefix() +
3600         " transit current cluster state to {} in a synchronous replication peer id={}",
3601       state, peerId);
3602     return executePeerProcedure(new TransitPeerSyncReplicationStateProcedure(peerId, state));
3603   }
3604
3605   /**
3606    * Mark region server(s) as decommissioned (previously called 'draining') to prevent additional
3607    * regions from getting assigned to them. Also unload the regions on the servers asynchronously.0
3608    * @param servers Region servers to decommission.
3609    */
3610   public void decommissionRegionServers(final List<ServerName> servers, final boolean offload)
3611       throws HBaseIOException {
3612     List<ServerName> serversAdded = new ArrayList<>(servers.size());
3613     // Place the decommission marker first.
3614     String parentZnode = getZooKeeper().getZNodePaths().drainingZNode;
3615     for (ServerName server : servers) {
3616       try {
3617         String node = ZNodePaths.joinZNode(parentZnode, server.getServerName());
3618         ZKUtil.createAndFailSilent(getZooKeeper(), node);
3619       } catch (KeeperException ke) {
3620         throw new HBaseIOException(
3621           this.zooKeeper.prefix("Unable to decommission '" + server.getServerName() + "'."), ke);
3622       }
3623       if (this.serverManager.addServerToDrainList(server)) {
3624         serversAdded.add(server);
3625       }
3626     }
3627     // Move the regions off the decommissioned servers.
3628     if (offload) {
3629       final List<ServerName> destServers = this.serverManager.createDestinationServersList();
3630       for (ServerName server : serversAdded) {
3631         final List<RegionInfo> regionsOnServer = this.assignmentManager.getRegionsOnServer(server);
3632         for (RegionInfo hri : regionsOnServer) {
3633           ServerName dest = balancer.randomAssignment(hri, destServers);
3634           if (dest == null) {
3635             throw new HBaseIOException("Unable to determine a plan to move " + hri);
3636           }
3637           RegionPlan rp = new RegionPlan(hri, server, dest);
3638           this.assignmentManager.moveAsync(rp);
3639         }
3640       }
3641     }
3642   }
3643
3644   /**
3645    * List region servers marked as decommissioned (previously called 'draining') to not get regions
3646    * assigned to them.
3647    * @return List of decommissioned servers.
3648    */
3649   public List<ServerName> listDecommissionedRegionServers() {
3650     return this.serverManager.getDrainingServersList();
3651   }
3652
3653   /**
3654    * Remove decommission marker (previously called 'draining') from a region server to allow regions
3655    * assignments. Load regions onto the server asynchronously if a list of regions is given
3656    * @param server Region server to remove decommission marker from.
3657    */
3658   public void recommissionRegionServer(final ServerName server,
3659       final List<byte[]> encodedRegionNames) throws IOException {
3660     // Remove the server from decommissioned (draining) server list.
3661     String parentZnode = getZooKeeper().getZNodePaths().drainingZNode;
3662     String node = ZNodePaths.joinZNode(parentZnode, server.getServerName());
3663     try {
3664       ZKUtil.deleteNodeFailSilent(getZooKeeper(), node);
3665     } catch (KeeperException ke) {
3666       throw new HBaseIOException(
3667         this.zooKeeper.prefix("Unable to recommission '" + server.getServerName() + "'."), ke);
3668     }
3669     this.serverManager.removeServerFromDrainList(server);
3670
3671     // Load the regions onto the server if we are given a list of regions.
3672     if (encodedRegionNames == null || encodedRegionNames.isEmpty()) {
3673       return;
3674     }
3675     if (!this.serverManager.isServerOnline(server)) {
3676       return;
3677     }
3678     for (byte[] encodedRegionName : encodedRegionNames) {
3679       RegionState regionState =
3680         assignmentManager.getRegionStates().getRegionState(Bytes.toString(encodedRegionName));
3681       if (regionState == null) {
3682         LOG.warn("Unknown region " + Bytes.toStringBinary(encodedRegionName));
3683         continue;
3684       }
3685       RegionInfo hri = regionState.getRegion();
3686       if (server.equals(regionState.getServerName())) {
3687         LOG.info("Skipping move of region " + hri.getRegionNameAsString() +
3688           " because region already assigned to the same server " + server + ".");
3689         continue;
3690       }
3691       RegionPlan rp = new RegionPlan(hri, regionState.getServerName(), server);
3692       this.assignmentManager.moveAsync(rp);
3693     }
3694   }
3695
3696   @Override
3697   public LockManager getLockManager() {
3698     return lockManager;
3699   }
3700
3701   public QuotaObserverChore getQuotaObserverChore() {
3702     return this.quotaObserverChore;
3703   }
3704
3705   public SpaceQuotaSnapshotNotifier getSpaceQuotaSnapshotNotifier() {
3706     return this.spaceQuotaSnapshotNotifier;
3707   }
3708
3709   @SuppressWarnings("unchecked")
3710   private RemoteProcedure<MasterProcedureEnv, ?> getRemoteProcedure(long procId) {
3711     Procedure<?> procedure = procedureExecutor.getProcedure(procId);
3712     if (procedure == null) {
3713       return null;
3714     }
3715     assert procedure instanceof RemoteProcedure;
3716     return (RemoteProcedure<MasterProcedureEnv, ?>) procedure;
3717   }
3718
3719   public void remoteProcedureCompleted(long procId) {
3720     LOG.debug("Remote procedure done, pid={}", procId);
3721     RemoteProcedure<MasterProcedureEnv, ?> procedure = getRemoteProcedure(procId);
3722     if (procedure != null) {
3723       procedure.remoteOperationCompleted(procedureExecutor.getEnvironment());
3724     }
3725   }
3726
3727   public void remoteProcedureFailed(long procId, RemoteProcedureException error) {
3728     LOG.debug("Remote procedure failed, pid={}", procId, error);
3729     RemoteProcedure<MasterProcedureEnv, ?> procedure = getRemoteProcedure(procId);
3730     if (procedure != null) {
3731       procedure.remoteOperationFailed(procedureExecutor.getEnvironment(), error);
3732     }
3733   }
3734
3735   /**
3736    * Reopen regions provided in the argument
3737    *
3738    * @param tableName The current table name
3739    * @param regionNames The region names of the regions to reopen
3740    * @param nonceGroup Identifier for the source of the request, a client or process
3741    * @param nonce A unique identifier for this operation from the client or process identified by
3742    *   <code>nonceGroup</code> (the source must ensure each operation gets a unique id).
3743    * @return procedure Id
3744    * @throws IOException if reopening region fails while running procedure
3745    */
3746   long reopenRegions(final TableName tableName, final List<byte[]> regionNames,
3747       final long nonceGroup, final long nonce)
3748       throws IOException {
3749
3750     return MasterProcedureUtil
3751       .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
3752
3753         @Override
3754         protected void run() throws IOException {
3755           submitProcedure(new ReopenTableRegionsProcedure(tableName, regionNames));
3756         }
3757
3758         @Override
3759         protected String getDescription() {
3760           return "ReopenTableRegionsProcedure";
3761         }
3762
3763       });
3764
3765   }
3766
3767   @Override
3768   public ReplicationPeerManager getReplicationPeerManager() {
3769     return replicationPeerManager;
3770   }
3771
3772   public HashMap<String, List<Pair<ServerName, ReplicationLoadSource>>>
3773       getReplicationLoad(ServerName[] serverNames) {
3774     List<ReplicationPeerDescription> peerList = this.getReplicationPeerManager().listPeers(null);
3775     if (peerList == null) {
3776       return null;
3777     }
3778     HashMap<String, List<Pair<ServerName, ReplicationLoadSource>>> replicationLoadSourceMap =
3779         new HashMap<>(peerList.size());
3780     peerList.stream()
3781         .forEach(peer -> replicationLoadSourceMap.put(peer.getPeerId(), new ArrayList<>()));
3782     for (ServerName serverName : serverNames) {
3783       List<ReplicationLoadSource> replicationLoadSources =
3784           getServerManager().getLoad(serverName).getReplicationLoadSourceList();
3785       for (ReplicationLoadSource replicationLoadSource : replicationLoadSources) {
3786         replicationLoadSourceMap.get(replicationLoadSource.getPeerID())
3787             .add(new Pair<>(serverName, replicationLoadSource));
3788       }
3789     }
3790     for (List<Pair<ServerName, ReplicationLoadSource>> loads : replicationLoadSourceMap.values()) {
3791       if (loads.size() > 0) {
3792         loads.sort(Comparator.comparingLong(load -> (-1) * load.getSecond().getReplicationLag()));
3793       }
3794     }
3795     return replicationLoadSourceMap;
3796   }
3797
3798   /**
3799    * This method modifies the master's configuration in order to inject replication-related features
3800    */
3801   @VisibleForTesting
3802   public static void decorateMasterConfiguration(Configuration conf) {
3803     String plugins = conf.get(HBASE_MASTER_LOGCLEANER_PLUGINS);
3804     String cleanerClass = ReplicationLogCleaner.class.getCanonicalName();
3805     if (!plugins.contains(cleanerClass)) {
3806       conf.set(HBASE_MASTER_LOGCLEANER_PLUGINS, plugins + "," + cleanerClass);
3807     }
3808     if (ReplicationUtils.isReplicationForBulkLoadDataEnabled(conf)) {
3809       plugins = conf.get(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS);
3810       cleanerClass = ReplicationHFileCleaner.class.getCanonicalName();
3811       if (!plugins.contains(cleanerClass)) {
3812         conf.set(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, plugins + "," + cleanerClass);
3813       }
3814     }
3815   }
3816
3817   public SnapshotQuotaObserverChore getSnapshotQuotaObserverChore() {
3818     return this.snapshotQuotaChore;
3819   }
3820
3821   @Override
3822   public SyncReplicationReplayWALManager getSyncReplicationReplayWALManager() {
3823     return this.syncReplicationReplayWALManager;
3824   }
3825
3826   @Override
3827   public Map<String, ReplicationStatus> getWalGroupsReplicationStatus() {
3828     if (!this.isOnline() || !LoadBalancer.isMasterCanHostUserRegions(conf)) {
3829       return new HashMap<>();
3830     }
3831     return super.getWalGroupsReplicationStatus();
3832   }
3833
3834   public HbckChore getHbckChore() {
3835     return this.hbckChore;
3836   }
3837
3838   @Override
3839   public String getClusterId() {
3840     if (activeMaster) {
3841       return super.getClusterId();
3842     }
3843     return cachedClusterId.getFromCacheOrFetch();
3844   }
3845
3846   @Override
3847   public void runReplicationBarrierCleaner() {
3848     ReplicationBarrierCleaner rbc = this.replicationBarrierCleaner;
3849     if (rbc != null) {
3850       rbc.chore();
3851     }
3852   }
3853 }