From e4e561b37fe5ff623bcf53ad096b30610f36dab2 Mon Sep 17 00:00:00 2001 From: Guanghao Zhang Date: Sat, 13 Apr 2019 11:08:12 +0800 Subject: [PATCH] Revert "Merge pull request #133 from infraio/retry-backoff HBASE-22193 Add backoff when region failed open too many times" This reverts commit 249ac58d4fe10e19e6643a37907a39b75294dbdc, reversing changes made to f7867c4ffb0afd2161242949f397bbefe0af473e. --- .../hbase/master/assignment/AssignmentManager.java | 11 --------- .../assignment/TransitRegionStateProcedure.java | 26 +++++++++------------- 2 files changed, 11 insertions(+), 26 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index 5bdbb92769..1aea8f9dbc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -131,10 +131,6 @@ public class AssignmentManager { "hbase.assignment.maximum.attempts"; private static final int DEFAULT_ASSIGN_MAX_ATTEMPTS = Integer.MAX_VALUE; - public static final String ASSIGN_RETRY_IMMEDIATELY_MAX_ATTEMPTS = - "hbase.assignment.retry.immediately.maximum.attempts"; - private static final int DEFAULT_ASSIGN_RETRY_IMMEDIATELY_MAX_ATTEMPTS = 3; - /** Region in Transition metrics threshold time */ public static final String METRICS_RIT_STUCK_WARNING_THRESHOLD = "hbase.metrics.rit.stuck.warning.threshold"; @@ -155,7 +151,6 @@ public class AssignmentManager { private final int assignDispatchWaitQueueMaxSize; private final int assignDispatchWaitMillis; private final int assignMaxAttempts; - private final int assignRetryImmediatelyMaxAttempts; private final Object checkIfShouldMoveSystemRegionLock = new Object(); @@ -184,8 +179,6 @@ public class AssignmentManager { this.assignMaxAttempts = Math.max(1, conf.getInt(ASSIGN_MAX_ATTEMPTS, DEFAULT_ASSIGN_MAX_ATTEMPTS)); - this.assignRetryImmediatelyMaxAttempts = conf.getInt(ASSIGN_RETRY_IMMEDIATELY_MAX_ATTEMPTS, - DEFAULT_ASSIGN_RETRY_IMMEDIATELY_MAX_ATTEMPTS); int ritChoreInterval = conf.getInt(RIT_CHORE_INTERVAL_MSEC_CONF_KEY, DEFAULT_RIT_CHORE_INTERVAL_MSEC); @@ -315,10 +308,6 @@ public class AssignmentManager { return assignMaxAttempts; } - int getAssignRetryImmediatelyMaxAttempts() { - return assignRetryImmediatelyMaxAttempts; - } - public RegionStates getRegionStates() { return regionStates; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java index 716db69a31..1be7a9af99 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java @@ -226,32 +226,20 @@ public class TransitRegionStateProcedure return Flow.HAS_MORE_STATE; } - int retries = env.getAssignmentManager().getRegionStates().addToFailedOpen(regionNode) - .incrementAndGetRetries(); - int maxAttempts = env.getAssignmentManager().getAssignMaxAttempts(); - LOG.info("Retry={} of max={}; {}; {}", retries, maxAttempts, this, regionNode.toShortString()); - - if (retries >= maxAttempts) { + if (incrementAndCheckMaxAttempts(env, regionNode)) { env.getAssignmentManager().regionFailedOpen(regionNode, true); setFailure(getClass().getSimpleName(), new RetriesExhaustedException( "Max attempts " + env.getAssignmentManager().getAssignMaxAttempts() + " exceeded")); regionNode.unsetProcedure(this); return Flow.NO_MORE_STATE; } - env.getAssignmentManager().regionFailedOpen(regionNode, false); // we failed to assign the region, force a new plan forceNewPlan = true; regionNode.setRegionLocation(null); setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE); - - if (retries > env.getAssignmentManager().getAssignRetryImmediatelyMaxAttempts()) { - // Throw exception to backoff and retry when failed open too many times - throw new HBaseIOException("Failed to open region"); - } else { - // Here we do not throw exception because we want to the region to be online ASAP - return Flow.HAS_MORE_STATE; - } + // Here we do not throw exception because we want to the region to be online ASAP + return Flow.HAS_MORE_STATE; } private void closeRegion(MasterProcedureEnv env, RegionStateNode regionNode) throws IOException { @@ -412,6 +400,14 @@ public class TransitRegionStateProcedure this.remoteProc = null; } + private boolean incrementAndCheckMaxAttempts(MasterProcedureEnv env, RegionStateNode regionNode) { + int retries = env.getAssignmentManager().getRegionStates().addToFailedOpen(regionNode) + .incrementAndGetRetries(); + int max = env.getAssignmentManager().getAssignMaxAttempts(); + LOG.info("Retry={} of max={}; {}; {}", retries, max, this, regionNode.toShortString()); + return retries >= max; + } + @Override protected void rollbackState(MasterProcedureEnv env, RegionStateTransitionState state) throws IOException, InterruptedException { -- 2.11.4.GIT