From 4e09e4f88f0f3360ce89caa8fcb3fab63389e75c Mon Sep 17 00:00:00 2001 From: linkaline Date: Mon, 2 Sep 2019 17:15:42 +0800 Subject: [PATCH] HBASE-22642 Make move operations of RSGroup idempotent (#387) Signed-off-by: Guanghao Zhang --- .../hadoop/hbase/rsgroup/RSGroupAdminServer.java | 19 +- .../hadoop/hbase/rsgroup/TestRSGroupsAdmin2.java | 223 +++++++++++++++------ 2 files changed, 167 insertions(+), 75 deletions(-) diff --git a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java index 0654b877e0..f3ef4fb96d 100644 --- a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java +++ b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java @@ -177,10 +177,7 @@ public class RSGroupAdminServer implements RSGroupAdmin { + " does not exist."); } RSGroupInfo srcGrp = new RSGroupInfo(tmpSrcGrp); - if (srcGrp.getName().equals(targetGroupName)) { - throw new ConstraintException("Target RSGroup " + targetGroupName + - " is same as source " + srcGrp.getName() + " RSGroup."); - } + // Only move online servers checkOnlineServersOnly(servers); @@ -351,10 +348,6 @@ public class RSGroupAdminServer implements RSGroupAdmin { throw new ConstraintException("Source RSGroup for server " + firstServer + " does not exist."); } - if (srcGrp.getName().equals(targetGroupName)) { - throw new ConstraintException("Target RSGroup " + targetGroupName + - " is same as source " + srcGrp + " RSGroup."); - } // Only move online servers (when moving from 'default') or servers from other // groups. This prevents bogus servers from entering groups if (RSGroupInfo.DEFAULT_GROUP.equals(srcGrp.getName())) { @@ -406,16 +399,6 @@ public class RSGroupAdminServer implements RSGroupAdmin { throw new ConstraintException("Target RSGroup must have at least one server."); } } - - for (TableName table : tables) { - String srcGroup = rsGroupInfoManager.getRSGroupOfTable(table); - if(srcGroup != null && srcGroup.equals(targetGroup)) { - throw new ConstraintException( - "Source RSGroup " + srcGroup + " is same as target " + targetGroup + - " RSGroup for table " + table); - } - LOG.info("Moving table {} to RSGroup {}", table.getNameAsString(), targetGroup); - } rsGroupInfoManager.moveTables(tables, targetGroup); // targetGroup is null when a table is being deleted. In this case no further diff --git a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsAdmin2.java b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsAdmin2.java index d9c1b10cb6..6553a85c93 100644 --- a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsAdmin2.java +++ b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsAdmin2.java @@ -21,6 +21,7 @@ import static org.apache.hadoop.hbase.rsgroup.RSGroupAdminServer.DEFAULT_MAX_RET import static org.apache.hadoop.hbase.util.Threads.sleep; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -30,6 +31,7 @@ import java.util.EnumSet; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Random; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Function; @@ -57,6 +59,7 @@ import org.junit.experimental.categories.Category; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; import org.apache.hbase.thirdparty.com.google.common.collect.Sets; @Category({ LargeTests.class }) @@ -341,17 +344,9 @@ public class TestRSGroupsAdmin2 extends TestRSGroupsBase { assertTrue(msg + " " + ex.getMessage(), ex.getMessage().contains(exp)); } - // test fail server move - try { - rsGroupAdmin.moveServersAndTables(Sets.newHashSet(targetServer.getAddress()), - Sets.newHashSet(tableName), RSGroupInfo.DEFAULT_GROUP); - fail("servers shouldn't have been successfully moved."); - } catch (IOException ex) { - String exp = "Target RSGroup " + RSGroupInfo.DEFAULT_GROUP + " is same as source " + - RSGroupInfo.DEFAULT_GROUP + " RSGroup."; - String msg = "Expected '" + exp + "' in exception message: "; - assertTrue(msg + " " + ex.getMessage(), ex.getMessage().contains(exp)); - } + // test move when src = dst + rsGroupAdmin.moveServersAndTables(Sets.newHashSet(targetServer.getAddress()), + Sets.newHashSet(tableName), RSGroupInfo.DEFAULT_GROUP); // verify default group info Assert.assertEquals(oldDefaultGroupServerSize, @@ -590,39 +585,6 @@ public class TestRSGroupsAdmin2 extends TestRSGroupsBase { }); } - @Test - public void testFailedMoveWhenMoveServer() throws Exception{ - String groupName = getGroupName(name.getMethodName()); - rsGroupAdmin.addRSGroup(groupName); - final RSGroupInfo newGroup = rsGroupAdmin.getRSGroupInfo(groupName); - Pair gotPair = createTableWithRegionSplitting(newGroup, - 10); - try{ - rsGroupAdmin.moveServers(Sets.newHashSet(gotPair.getFirst().getAddress()), - newGroup.getName()); - fail("should get IOException when retry exhausted but there still exists failed moved " - + "regions"); - }catch (IOException e){ - assertTrue(e.getMessage().contains( - gotPair.getSecond().getRegionInfo().getRegionNameAsString())); - } - } - - @Test - public void testFailedMoveWhenMoveTable() throws Exception{ - final RSGroupInfo newGroup = addGroup(getGroupName(name.getMethodName()), 1); - Pair gotPair = createTableWithRegionSplitting(newGroup, - 5); - try{ - rsGroupAdmin.moveTables(Sets.newHashSet(tableName), newGroup.getName()); - fail("should get IOException when retry exhausted but there still exists failed moved " - + "regions"); - }catch (IOException e){ - assertTrue(e.getMessage().contains( - gotPair.getSecond().getRegionInfo().getRegionNameAsString())); - } - } - private Pair createTableWithRegionSplitting(RSGroupInfo rsGroupInfo, int tableRegionCount) throws Exception{ final byte[] familyNameBytes = Bytes.toBytes("f"); @@ -652,24 +614,171 @@ public class TestRSGroupsAdmin2 extends TestRSGroupsBase { RSGroupInfo newGroup) throws IOException{ // get target server to move, which should has more than one regions // randomly set a region state to SPLITTING to make move fail - Map> assignMap = getTableServerRegionMap().get(tableName); - String rregion = null; - ServerName toMoveServer = null; + return randomlySetRegionState(newGroup, RegionState.State.SPLITTING,tableName); + } + + private Pair randomlySetRegionState(RSGroupInfo groupInfo, + RegionState.State state, TableName... tableNames) throws IOException { + Preconditions.checkArgument(tableNames.length == 1 || tableNames.length == 2, + "only support one or two tables"); + Map> assignMap = getTableServerRegionMap().get(tableNames[0]); + if(tableNames.length == 2) { + Map> assignMap2 = getTableServerRegionMap().get(tableNames[1]); + assignMap2.forEach((k ,v) -> { + if(!assignMap.containsKey(k)) { + assignMap.remove(k); + } + }); + } + String toCorrectRegionName = null; + ServerName srcServer = null; for (ServerName server : assignMap.keySet()) { - rregion = assignMap.get(server).size() > 1 && - !newGroup.containsServer(server.getAddress()) ? assignMap.get(server).get(0) : null; - if (rregion != null) { - toMoveServer = server; + toCorrectRegionName = assignMap.get(server).size() >= 1 && + !groupInfo.containsServer(server.getAddress()) ? assignMap.get(server).get(0) : null; + if (toCorrectRegionName != null) { + srcServer = server; break; } } - assert toMoveServer != null; - RegionInfo ri = TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager(). - getRegionInfo(Bytes.toBytesBinary(rregion)); + assert srcServer != null; + RegionInfo toCorrectRegionInfo = TEST_UTIL.getMiniHBaseCluster().getMaster() + .getAssignmentManager().getRegionInfo(Bytes.toBytesBinary(toCorrectRegionName)); RegionStateNode rsn = TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStates() - .getRegionStateNode(ri); - rsn.setState(RegionState.State.SPLITTING); - return new Pair<>(toMoveServer, rsn); + .getRegionStateNode(toCorrectRegionInfo); + rsn.setState(state); + return new Pair<>(srcServer, rsn); + } + + @Test + public void testFailedMoveTablesAndRepair() throws Exception{ + // This UT calls moveTables() twice to test the idempotency of it. + // The first time, movement fails because a region is made in SPLITTING state + // which will not be moved. + // The second time, the region state is OPEN and check if all + // regions on target group servers after the call. + final RSGroupInfo newGroup = addGroup(getGroupName(name.getMethodName()), 1); + Iterator iterator = newGroup.getServers().iterator(); + Address newGroupServer1 = (Address) iterator.next(); + + // create table + // randomly set a region state to SPLITTING to make move abort + Pair gotPair = createTableWithRegionSplitting(newGroup, + new Random().nextInt(8) + 4); + RegionStateNode rsn = gotPair.getSecond(); + + // move table to newGroup and check regions + try { + rsGroupAdmin.moveTables(Sets.newHashSet(tableName), newGroup.getName()); + fail("should get IOException when retry exhausted but there still exists failed moved " + + "regions"); + }catch (Exception e){ + assertTrue(e.getMessage().contains( + gotPair.getSecond().getRegionInfo().getRegionNameAsString())); + } + for(RegionInfo regionInfo : master.getAssignmentManager().getAssignedRegions()){ + if (regionInfo.getTable().equals(tableName) && regionInfo.equals(rsn.getRegionInfo())) { + assertNotEquals(master.getAssignmentManager().getRegionStates() + .getRegionServerOfRegion(regionInfo).getAddress(), newGroupServer1); + } + } + + // retry move table to newGroup and check if all regions are corrected + rsn.setState(RegionState.State.OPEN); + rsGroupAdmin.moveTables(Sets.newHashSet(tableName), newGroup.getName()); + for(RegionInfo regionInfo : master.getAssignmentManager().getAssignedRegions()){ + if (regionInfo.getTable().equals(tableName)) { + assertEquals(master.getAssignmentManager().getRegionStates() + .getRegionServerOfRegion(regionInfo).getAddress(), newGroupServer1); + } + } + } + + @Test + public void testFailedMoveServersAndRepair() throws Exception{ + // This UT calls moveServers() twice to test the idempotency of it. + // The first time, movement fails because a region is made in SPLITTING state + // which will not be moved. + // The second time, the region state is OPEN and check if all + // regions on target group servers after the call. + final RSGroupInfo newGroup = addGroup(getGroupName(name.getMethodName()), 1); + + // create table + // randomly set a region state to SPLITTING to make move abort + Pair gotPair = createTableWithRegionSplitting(newGroup, + new Random().nextInt(8) + 4); + RegionStateNode rsn = gotPair.getSecond(); + ServerName srcServer = rsn.getRegionLocation(); + + // move server to newGroup and check regions + try { + rsGroupAdmin.moveServers(Sets.newHashSet(srcServer.getAddress()), newGroup.getName()); + fail("should get IOException when retry exhausted but there still exists failed moved " + + "regions"); + }catch (Exception e){ + assertTrue(e.getMessage().contains( + gotPair.getSecond().getRegionInfo().getRegionNameAsString())); + } + for(RegionInfo regionInfo : master.getAssignmentManager().getAssignedRegions()){ + if (regionInfo.getTable().equals(tableName) && regionInfo.equals(rsn.getRegionInfo())) { + assertEquals(master.getAssignmentManager().getRegionStates() + .getRegionServerOfRegion(regionInfo), srcServer); + } + } + + // retry move server to newGroup and check if all regions on srcServer was moved + rsn.setState(RegionState.State.OPEN); + rsGroupAdmin.moveServers(Sets.newHashSet(srcServer.getAddress()), newGroup.getName()); + assertEquals(master.getAssignmentManager().getRegionsOnServer(srcServer).size(), 0); + } + + @Test + public void testFailedMoveServersTablesAndRepair() throws Exception{ + // This UT calls moveTablesAndServers() twice to test the idempotency of it. + // The first time, movement fails because a region is made in SPLITTING state + // which will not be moved. + // The second time, the region state is OPEN and check if all + // regions on target group servers after the call. + final RSGroupInfo newGroup = addGroup(getGroupName(name.getMethodName()), 1); + // create table + final byte[] familyNameBytes = Bytes.toBytes("f"); + TableName table1 = TableName.valueOf(tableName.getNameAsString() + "_1"); + TableName table2 = TableName.valueOf(tableName.getNameAsString() + "_2"); + TEST_UTIL.createMultiRegionTable(table1, familyNameBytes, + new Random().nextInt(12) + 4); + TEST_UTIL.createMultiRegionTable(table2, familyNameBytes, + new Random().nextInt(12) + 4); + + // randomly set a region state to SPLITTING to make move abort + Pair gotPair = + randomlySetRegionState(newGroup, RegionState.State.SPLITTING, table1, table2); + RegionStateNode rsn = gotPair.getSecond(); + ServerName srcServer = rsn.getRegionLocation(); + + // move server and table to newGroup and check regions + try { + rsGroupAdmin.moveServersAndTables(Sets.newHashSet(srcServer.getAddress()), + Sets.newHashSet(table2), newGroup.getName()); + fail("should get IOException when retry exhausted but there still exists failed moved " + + "regions"); + }catch (Exception e){ + assertTrue(e.getMessage().contains( + gotPair.getSecond().getRegionInfo().getRegionNameAsString())); + } + for(RegionInfo regionInfo : master.getAssignmentManager().getAssignedRegions()){ + if (regionInfo.getTable().equals(table1) && regionInfo.equals(rsn.getRegionInfo())) { + assertEquals(master.getAssignmentManager().getRegionStates() + .getRegionServerOfRegion(regionInfo), srcServer); + } + } + + // retry moveServersAndTables to newGroup and check if all regions on srcServer belongs to + // table2 + rsn.setState(RegionState.State.OPEN); + rsGroupAdmin.moveServersAndTables(Sets.newHashSet(srcServer.getAddress()), + Sets.newHashSet(table2), newGroup.getName()); + for(RegionInfo regionsInfo : master.getAssignmentManager().getRegionsOnServer(srcServer)){ + assertEquals(regionsInfo.getTable(), table2); + } } } -- 2.11.4.GIT