2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org
.apache
.hadoop
.hbase
.util
;
20 import org
.apache
.hadoop
.hbase
.HConstants
;
21 import org
.apache
.hadoop
.hbase
.HRegionInfo
;
22 import org
.apache
.hadoop
.hbase
.MetaTableAccessor
;
23 import org
.apache
.hadoop
.hbase
.ServerName
;
24 import org
.apache
.hadoop
.hbase
.TableName
;
25 import org
.apache
.hadoop
.hbase
.client
.ClusterConnection
;
26 import org
.apache
.hadoop
.hbase
.client
.Delete
;
27 import org
.apache
.hadoop
.hbase
.client
.Put
;
28 import org
.apache
.hadoop
.hbase
.client
.RegionReplicaUtil
;
29 import org
.apache
.hadoop
.hbase
.client
.Table
;
30 import org
.apache
.hadoop
.hbase
.coprocessor
.CoprocessorHost
;
31 import org
.apache
.hadoop
.hbase
.master
.AssignmentManager
;
32 import org
.apache
.hadoop
.hbase
.testclassification
.LargeTests
;
33 import org
.apache
.hadoop
.hbase
.testclassification
.MiscTests
;
34 import org
.junit
.AfterClass
;
35 import org
.junit
.Before
;
36 import org
.junit
.BeforeClass
;
37 import org
.junit
.Rule
;
38 import org
.junit
.Test
;
39 import org
.junit
.experimental
.categories
.Category
;
40 import org
.junit
.rules
.TestName
;
42 import java
.util
.Arrays
;
43 import java
.util
.Collection
;
44 import java
.util
.HashSet
;
45 import java
.util
.List
;
47 import java
.util
.NavigableMap
;
49 import java
.util
.concurrent
.ScheduledThreadPoolExecutor
;
50 import java
.util
.concurrent
.SynchronousQueue
;
51 import java
.util
.concurrent
.ThreadPoolExecutor
;
52 import java
.util
.concurrent
.TimeUnit
;
54 import static org
.apache
.hadoop
.hbase
.util
.hbck
.HbckTestingUtil
.*;
55 import static org
.junit
.Assert
.*;
57 @Category({MiscTests
.class, LargeTests
.class})
58 public class TestHBaseFsckReplicas
extends BaseTestHBaseFsck
{
60 public TestName name
= new TestName();
63 public static void setUpBeforeClass() throws Exception
{
64 TEST_UTIL
.getConfiguration().set(CoprocessorHost
.MASTER_COPROCESSOR_CONF_KEY
,
65 MasterSyncObserver
.class.getName());
67 conf
.setInt("hbase.regionserver.handler.count", 2);
68 conf
.setInt("hbase.regionserver.metahandler.count", 30);
70 conf
.setInt("hbase.htable.threads.max", POOL_SIZE
);
71 conf
.setInt("hbase.hconnection.threads.max", 2 * POOL_SIZE
);
72 conf
.setInt("hbase.hbck.close.timeout", 2 * REGION_ONLINE_TIMEOUT
);
73 conf
.setInt(HConstants
.HBASE_RPC_TIMEOUT_KEY
, 8 * REGION_ONLINE_TIMEOUT
);
74 TEST_UTIL
.startMiniCluster(3);
76 tableExecutorService
= new ThreadPoolExecutor(1, POOL_SIZE
, 60, TimeUnit
.SECONDS
,
77 new SynchronousQueue
<>(), Threads
.newDaemonThreadFactory("testhbck"));
79 hbfsckExecutorService
= new ScheduledThreadPoolExecutor(POOL_SIZE
);
81 AssignmentManager assignmentManager
=
82 TEST_UTIL
.getHBaseCluster().getMaster().getAssignmentManager();
83 regionStates
= assignmentManager
.getRegionStates();
85 connection
= (ClusterConnection
) TEST_UTIL
.getConnection();
87 admin
= connection
.getAdmin();
88 admin
.setBalancerRunning(false, true);
90 TEST_UTIL
.waitUntilAllRegionsAssigned(TableName
.META_TABLE_NAME
);
91 TEST_UTIL
.waitUntilAllRegionsAssigned(TableName
.NAMESPACE_TABLE_NAME
);
95 public static void tearDownAfterClass() throws Exception
{
96 tableExecutorService
.shutdown();
97 hbfsckExecutorService
.shutdown();
99 TEST_UTIL
.shutdownMiniCluster();
103 public void setUp() {
104 EnvironmentEdgeManager
.reset();
108 * This creates a table with region_replica > 1 and verifies hbck runs
111 @Test(timeout
=180000)
112 public void testHbckWithRegionReplica() throws Exception
{
113 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
115 setupTableWithRegionReplica(tableName
, 2);
116 admin
.flush(tableName
);
117 assertNoErrors(doFsck(conf
, false));
119 cleanupTable(tableName
);
123 @Test (timeout
=180000)
124 public void testHbckWithFewerReplica() throws Exception
{
125 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
127 setupTableWithRegionReplica(tableName
, 2);
128 admin
.flush(tableName
);
129 assertNoErrors(doFsck(conf
, false));
130 assertEquals(ROWKEYS
.length
, countRows());
131 deleteRegion(conf
, tbl
.getTableDescriptor(), Bytes
.toBytes("B"), Bytes
.toBytes("C"), true,
132 false, false, false, 1); // unassign one replica
133 // check that problem exists
134 HBaseFsck hbck
= doFsck(conf
, false);
135 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] { HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_DEPLOYED
});
137 hbck
= doFsck(conf
, true);
138 // run hbck again to make sure we don't see any errors
139 hbck
= doFsck(conf
, false);
140 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] {});
142 cleanupTable(tableName
);
146 @Test (timeout
=180000)
147 public void testHbckWithExcessReplica() throws Exception
{
148 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
150 setupTableWithRegionReplica(tableName
, 2);
151 admin
.flush(tableName
);
152 assertNoErrors(doFsck(conf
, false));
153 assertEquals(ROWKEYS
.length
, countRows());
154 // the next few lines inject a location in meta for a replica, and then
155 // asks the master to assign the replica (the meta needs to be injected
156 // for the master to treat the request for assignment as valid; the master
157 // checks the region is valid either from its memory or meta)
158 Table meta
= connection
.getTable(TableName
.META_TABLE_NAME
, tableExecutorService
);
159 List
<HRegionInfo
> regions
= admin
.getTableRegions(tableName
);
160 byte[] startKey
= Bytes
.toBytes("B");
161 byte[] endKey
= Bytes
.toBytes("C");
162 byte[] metaKey
= null;
163 HRegionInfo newHri
= null;
164 for (HRegionInfo h
: regions
) {
165 if (Bytes
.compareTo(h
.getStartKey(), startKey
) == 0 &&
166 Bytes
.compareTo(h
.getEndKey(), endKey
) == 0 &&
167 h
.getReplicaId() == HRegionInfo
.DEFAULT_REPLICA_ID
) {
168 metaKey
= h
.getRegionName();
169 //create a hri with replicaId as 2 (since we already have replicas with replicaid 0 and 1)
170 newHri
= RegionReplicaUtil
.getRegionInfoForReplica(h
, 2);
174 Put put
= new Put(metaKey
);
175 Collection
<ServerName
> var
= admin
.getClusterStatus().getServers();
176 ServerName sn
= var
.toArray(new ServerName
[var
.size()])[0];
177 //add a location with replicaId as 2 (since we already have replicas with replicaid 0 and 1)
178 MetaTableAccessor
.addLocation(put
, sn
, sn
.getStartcode(), -1, 2);
180 // assign the new replica
181 HBaseFsckRepair
.fixUnassigned(admin
, newHri
);
182 HBaseFsckRepair
.waitUntilAssigned(admin
, newHri
);
183 // now reset the meta row to its original value
184 Delete delete
= new Delete(metaKey
);
185 delete
.addColumns(HConstants
.CATALOG_FAMILY
, MetaTableAccessor
.getServerColumn(2));
186 delete
.addColumns(HConstants
.CATALOG_FAMILY
, MetaTableAccessor
.getStartCodeColumn(2));
187 delete
.addColumns(HConstants
.CATALOG_FAMILY
, MetaTableAccessor
.getSeqNumColumn(2));
190 // check that problem exists
191 HBaseFsck hbck
= doFsck(conf
, false);
192 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[]{HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_META
});
194 hbck
= doFsck(conf
, true);
195 // run hbck again to make sure we don't see any errors
196 hbck
= doFsck(conf
, false);
197 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[]{});
199 cleanupTable(tableName
);
204 * This creates and fixes a bad table with a region that is in meta but has
205 * no deployment or data hdfs. The table has region_replication set to 2.
207 @Test (timeout
=180000)
208 public void testNotInHdfsWithReplicas() throws Exception
{
209 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
211 HRegionInfo
[] oldHris
= new HRegionInfo
[2];
212 setupTableWithRegionReplica(tableName
, 2);
213 assertEquals(ROWKEYS
.length
, countRows());
214 NavigableMap
<HRegionInfo
, ServerName
> map
=
215 MetaTableAccessor
.allTableRegions(TEST_UTIL
.getConnection(),
218 // store the HRIs of the regions we will mess up
219 for (Map
.Entry
<HRegionInfo
, ServerName
> m
: map
.entrySet()) {
220 if (m
.getKey().getStartKey().length
> 0 &&
221 m
.getKey().getStartKey()[0] == Bytes
.toBytes("B")[0]) {
222 LOG
.debug("Initially server hosting " + m
.getKey() + " is " + m
.getValue());
223 oldHris
[i
++] = m
.getKey();
226 // make sure data in regions
227 admin
.flush(tableName
);
229 // Mess it up by leaving a hole in the hdfs data
230 deleteRegion(conf
, tbl
.getTableDescriptor(), Bytes
.toBytes("B"), Bytes
.toBytes("C"), false,
231 false, true); // don't rm meta
233 HBaseFsck hbck
= doFsck(conf
, false);
234 assertErrors(hbck
, new HBaseFsck
.ErrorReporter
.ERROR_CODE
[] { HBaseFsck
.ErrorReporter
.ERROR_CODE
.NOT_IN_HDFS
});
239 // check that hole fixed
240 assertNoErrors(doFsck(conf
, false));
241 assertEquals(ROWKEYS
.length
- 2, countRows());
243 // the following code checks whether the old primary/secondary has
244 // been unassigned and the new primary/secondary has been assigned
246 HRegionInfo
[] newHris
= new HRegionInfo
[2];
247 // get all table's regions from meta
248 map
= MetaTableAccessor
.allTableRegions(TEST_UTIL
.getConnection(), tbl
.getName());
249 // get the HRIs of the new regions (hbck created new regions for fixing the hdfs mess-up)
250 for (Map
.Entry
<HRegionInfo
, ServerName
> m
: map
.entrySet()) {
251 if (m
.getKey().getStartKey().length
> 0 &&
252 m
.getKey().getStartKey()[0] == Bytes
.toBytes("B")[0]) {
253 newHris
[i
++] = m
.getKey();
256 // get all the online regions in the regionservers
257 Collection
<ServerName
> servers
= admin
.getClusterStatus().getServers();
258 Set
<HRegionInfo
> onlineRegions
= new HashSet
<>();
259 for (ServerName s
: servers
) {
260 List
<HRegionInfo
> list
= admin
.getOnlineRegions(s
);
261 onlineRegions
.addAll(list
);
263 // the new HRIs must be a subset of the online regions
264 assertTrue(onlineRegions
.containsAll(Arrays
.asList(newHris
)));
265 // the old HRIs must not be part of the set (removeAll would return false if
266 // the set didn't change)
267 assertFalse(onlineRegions
.removeAll(Arrays
.asList(oldHris
)));
269 cleanupTable(tableName
);