2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org
.apache
.hadoop
.hbase
.master
;
20 import java
.io
.IOException
;
21 import java
.util
.HashMap
;
22 import java
.util
.LinkedList
;
23 import java
.util
.List
;
26 import java
.util
.concurrent
.locks
.ReentrantReadWriteLock
;
28 import org
.apache
.hadoop
.fs
.FileStatus
;
29 import org
.apache
.hadoop
.fs
.FileSystem
;
30 import org
.apache
.hadoop
.fs
.Path
;
31 import org
.apache
.hadoop
.hbase
.ScheduledChore
;
32 import org
.apache
.hadoop
.hbase
.ServerName
;
33 import org
.apache
.hadoop
.hbase
.client
.RegionInfo
;
34 import org
.apache
.hadoop
.hbase
.util
.EnvironmentEdgeManager
;
35 import org
.apache
.hadoop
.hbase
.util
.FSUtils
;
36 import org
.apache
.hadoop
.hbase
.util
.HbckRegionInfo
;
37 import org
.apache
.hadoop
.hbase
.util
.Pair
;
38 import org
.apache
.yetus
.audience
.InterfaceAudience
;
39 import org
.apache
.yetus
.audience
.InterfaceStability
;
40 import org
.slf4j
.Logger
;
41 import org
.slf4j
.LoggerFactory
;
43 import org
.apache
.hbase
.thirdparty
.com
.google
.common
.collect
.Lists
;
46 * Used to do the hbck checking job at master side.
48 @InterfaceAudience.Private
49 @InterfaceStability.Evolving
50 public class HbckChecker
extends ScheduledChore
{
51 private static final Logger LOG
= LoggerFactory
.getLogger(HbckChecker
.class.getName());
53 private static final String HBCK_CHECKER_INTERVAL
= "hbase.master.hbck.checker.interval";
54 private static final int DEFAULT_HBCK_CHECKER_INTERVAL
= 60 * 60 * 1000;
56 private final MasterServices master
;
59 * This map contains the state of all hbck items. It maps from encoded region
60 * name to HbckRegionInfo structure. The information contained in HbckRegionInfo is used
61 * to detect and correct consistency (hdfs/meta/deployment) problems.
63 private final Map
<String
, HbckRegionInfo
> regionInfoMap
= new HashMap
<>();
66 * The regions only opened on RegionServers, but no region info in meta.
68 private final Map
<String
, ServerName
> orphanRegionsOnRS
= new HashMap
<>();
70 * The regions have directory on FileSystem, but no region info in meta.
72 private final List
<String
> orphanRegionsOnFS
= new LinkedList
<>();
74 * The inconsistent regions. There are three case:
75 * case 1. Master thought this region opened, but no regionserver reported it.
76 * case 2. Master thought this region opened on Server1, but regionserver reported Server2
77 * case 3. More than one regionservers reported opened this region
79 private final Map
<String
, Pair
<ServerName
, List
<ServerName
>>> inconsistentRegions
=
83 * The "snapshot" is used to save the last round's HBCK checking report.
85 private final Map
<String
, ServerName
> orphanRegionsOnRSSnapshot
= new HashMap
<>();
86 private final List
<String
> orphanRegionsOnFSSnapshot
= new LinkedList
<>();
87 private final Map
<String
, Pair
<ServerName
, List
<ServerName
>>> inconsistentRegionsSnapshot
=
91 * The "snapshot" may be changed after checking. And this checking report "snapshot" may be
92 * accessed by web ui. Use this rwLock to synchronize.
94 ReentrantReadWriteLock rwLock
= new ReentrantReadWriteLock();
97 * When running, the "snapshot" may be changed when this round's checking finish.
99 private volatile boolean running
= false;
100 private volatile long checkingStartTimestamp
= 0;
101 private volatile long checkingEndTimestamp
= 0;
103 public HbckChecker(MasterServices master
) {
104 super("HbckChecker-", master
,
105 master
.getConfiguration().getInt(HBCK_CHECKER_INTERVAL
, DEFAULT_HBCK_CHECKER_INTERVAL
));
106 this.master
= master
;
110 protected void chore() {
112 regionInfoMap
.clear();
113 orphanRegionsOnRS
.clear();
114 orphanRegionsOnFS
.clear();
115 inconsistentRegions
.clear();
116 checkingStartTimestamp
= EnvironmentEdgeManager
.currentTime();
117 loadRegionsFromInMemoryState();
118 loadRegionsFromRSReport();
121 } catch (IOException e
) {
122 LOG
.warn("Failed to load the regions from filesystem", e
);
124 saveCheckResultToSnapshot();
128 private void saveCheckResultToSnapshot() {
129 // Need synchronized here, as this "snapshot" may be access by web ui.
130 rwLock
.writeLock().lock();
132 orphanRegionsOnRSSnapshot
.clear();
133 orphanRegionsOnRS
.entrySet()
134 .forEach(e
-> orphanRegionsOnRSSnapshot
.put(e
.getKey(), e
.getValue()));
135 orphanRegionsOnFSSnapshot
.clear();
136 orphanRegionsOnFSSnapshot
.addAll(orphanRegionsOnFS
);
137 inconsistentRegionsSnapshot
.clear();
138 inconsistentRegions
.entrySet()
139 .forEach(e
-> inconsistentRegionsSnapshot
.put(e
.getKey(), e
.getValue()));
140 checkingEndTimestamp
= EnvironmentEdgeManager
.currentTime();
142 rwLock
.writeLock().unlock();
146 private void loadRegionsFromInMemoryState() {
147 List
<RegionState
> regionStates
=
148 master
.getAssignmentManager().getRegionStates().getRegionStates();
149 for (RegionState regionState
: regionStates
) {
150 RegionInfo regionInfo
= regionState
.getRegion();
151 HbckRegionInfo
.MetaEntry metaEntry
=
152 new HbckRegionInfo
.MetaEntry(regionInfo
, regionState
.getServerName(),
153 regionState
.getStamp());
154 regionInfoMap
.put(regionInfo
.getEncodedName(), new HbckRegionInfo(metaEntry
));
158 private void loadRegionsFromRSReport() {
159 Map
<ServerName
, Set
<byte[]>> rsReports
= master
.getAssignmentManager().getRSReports();
160 for (Map
.Entry
<ServerName
, Set
<byte[]>> entry
: rsReports
.entrySet()) {
161 ServerName serverName
= entry
.getKey();
162 for (byte[] regionName
: entry
.getValue()) {
163 String encodedRegionName
= RegionInfo
.encodeRegionName(regionName
);
164 HbckRegionInfo hri
= regionInfoMap
.get(encodedRegionName
);
166 orphanRegionsOnRS
.put(encodedRegionName
, serverName
);
169 hri
.addServer(hri
.getMetaEntry(), serverName
);
173 for (Map
.Entry
<String
, HbckRegionInfo
> entry
: regionInfoMap
.entrySet()) {
174 String encodedRegionName
= entry
.getKey();
175 HbckRegionInfo hri
= entry
.getValue();
176 ServerName locationInMeta
= hri
.getMetaEntry().getRegionServer();
177 if (hri
.getDeployedOn().size() == 0) {
178 // Master thought this region opened, but no regionserver reported it.
179 inconsistentRegions
.put(encodedRegionName
, new Pair
<>(locationInMeta
, new LinkedList
<>()));
180 } else if (hri
.getDeployedOn().size() > 1) {
181 // More than one regionserver reported opened this region
182 inconsistentRegions
.put(encodedRegionName
, new Pair
<>(locationInMeta
, hri
.getDeployedOn()));
183 } else if (!hri
.getDeployedOn().get(0).equals(locationInMeta
)) {
184 // Master thought this region opened on Server1, but regionserver reported Server2
185 inconsistentRegions
.put(encodedRegionName
, new Pair
<>(locationInMeta
, hri
.getDeployedOn()));
190 private void loadRegionsFromFS() throws IOException
{
191 Path rootDir
= master
.getMasterFileSystem().getRootDir();
192 FileSystem fs
= master
.getMasterFileSystem().getFileSystem();
194 // list all tables from HDFS
195 List
<FileStatus
> tableDirs
= Lists
.newArrayList();
196 List
<Path
> paths
= FSUtils
.getTableDirs(fs
, rootDir
);
197 for (Path path
: paths
) {
198 tableDirs
.add(fs
.getFileStatus(path
));
201 for (FileStatus tableDir
: tableDirs
) {
202 FileStatus
[] regionDirs
= fs
.listStatus(tableDir
.getPath());
203 for (FileStatus regionDir
: regionDirs
) {
204 String encodedRegionName
= regionDir
.getPath().getName();
205 HbckRegionInfo hri
= regionInfoMap
.get(encodedRegionName
);
207 orphanRegionsOnFS
.add(encodedRegionName
);
210 HbckRegionInfo
.HdfsEntry hdfsEntry
=
211 new HbckRegionInfo
.HdfsEntry(regionDir
.getPath(), regionDir
.getModificationTime());
212 hri
.setHdfsEntry(hdfsEntry
);
218 * When running, the HBCK report may be changed later.
220 public boolean isRunning() {
225 * @return the regions only opened on RegionServers, but no region info in meta.
227 public Map
<String
, ServerName
> getOrphanRegionsOnRS() {
228 // Need synchronized here, as this "snapshot" may be changed after checking.
229 rwLock
.readLock().lock();
231 return this.orphanRegionsOnRSSnapshot
;
233 rwLock
.readLock().unlock();
238 * @return the regions have directory on FileSystem, but no region info in meta.
240 public List
<String
> getOrphanRegionsOnFS() {
241 // Need synchronized here, as this "snapshot" may be changed after checking.
242 rwLock
.readLock().lock();
244 return this.orphanRegionsOnFSSnapshot
;
246 rwLock
.readLock().unlock();
251 * Found the inconsistent regions. There are three case:
252 * case 1. Master thought this region opened, but no regionserver reported it.
253 * case 2. Master thought this region opened on Server1, but regionserver reported Server2
254 * case 3. More than one regionservers reported opened this region
256 * @return the map of inconsistent regions. Key is the region name. Value is a pair of location in
257 * meta and the regionservers which reported opened this region.
259 public Map
<String
, Pair
<ServerName
, List
<ServerName
>>> getInconsistentRegions() {
260 // Need synchronized here, as this "snapshot" may be changed after checking.
261 rwLock
.readLock().lock();
263 return this.inconsistentRegionsSnapshot
;
265 rwLock
.readLock().unlock();
270 * Used for web ui to show when the HBCK checking started.
272 public long getCheckingStartTimestamp() {
273 return this.checkingStartTimestamp
;
277 * Used for web ui to show when the HBCK checking report generated.
279 public long getCheckingEndTimestamp() {
280 return this.checkingStartTimestamp
;