From 7d3a89ce8e07d9fd1c31b4bd2324b71dd10ecef5 Mon Sep 17 00:00:00 2001 From: Harsh J Date: Fri, 4 Mar 2016 15:59:48 +0530 Subject: [PATCH] HBASE-15396 Enhance mapreduce.TableSplit to add encoded region name Signed-off-by: Sean Busbey --- .../hbase/mapreduce/MultiTableInputFormatBase.java | 4 +- .../hbase/mapreduce/TableInputFormatBase.java | 10 ++-- .../apache/hadoop/hbase/mapreduce/TableSplit.java | 59 ++++++++++++++++++---- .../hadoop/hbase/mapreduce/TestTableSplit.java | 22 +++++++- 4 files changed, 77 insertions(+), 18 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java index 6f0075a2e2..4931c3fd21 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java @@ -223,11 +223,13 @@ public abstract class MultiTableInputFormatBase extends keys.getFirst()[i], false); String regionHostname = hregionLocation.getHostname(); HRegionInfo regionInfo = hregionLocation.getRegionInfo(); + String encodedRegionName = regionInfo.getEncodedName(); long regionSize = sizeCalculator.getRegionSize( regionInfo.getRegionName()); TableSplit split = new TableSplit(table.getName(), - scan, splitStart, splitStop, regionHostname, regionSize); + scan, splitStart, splitStop, regionHostname, + encodedRegionName, regionSize); splits.add(split); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java index 65b4efc123..2cde4b99be 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java @@ -300,9 +300,10 @@ extends InputFormat { keys.getSecond()[i] : stopRow; byte[] regionName = location.getRegionInfo().getRegionName(); + String encodedRegionName = location.getRegionInfo().getEncodedName(); long regionSize = sizeCalculator.getRegionSize(regionName); TableSplit split = new TableSplit(tableName, scan, - splitStart, splitStop, regionLocation, regionSize); + splitStart, splitStop, regionLocation, encodedRegionName, regionSize); splits.add(split); if (LOG.isDebugEnabled()) { LOG.debug("getSplits: split -> " + i + " -> " + split); @@ -382,6 +383,7 @@ extends InputFormat { TableSplit ts = (TableSplit)list.get(count); TableName tableName = ts.getTable(); String regionLocation = ts.getRegionLocation(); + String encodedRegionName = ts.getEncodedRegionName(); long regionSize = ts.getLength(); if (regionSize >= dataSkewThreshold) { // if the current region size is large than the data skew threshold, @@ -390,9 +392,9 @@ extends InputFormat { //Set the size of child TableSplit as 1/2 of the region size. The exact size of the // MapReduce input splits is not far off. TableSplit t1 = new TableSplit(tableName, scan, ts.getStartRow(), splitKey, regionLocation, - regionSize / 2); + encodedRegionName, regionSize / 2); TableSplit t2 = new TableSplit(tableName, scan, splitKey, ts.getEndRow(), regionLocation, - regionSize - regionSize / 2); + encodedRegionName, regionSize - regionSize / 2); resultList.add(t1); resultList.add(t2); count++; @@ -419,7 +421,7 @@ extends InputFormat { } } TableSplit t = new TableSplit(tableName, scan, splitStartKey, splitEndKey, - regionLocation, totalSize); + regionLocation, encodedRegionName, totalSize); resultList.add(t); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java index 850db81112..1795909bba 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSplit.java @@ -52,7 +52,9 @@ implements Writable, Comparable { enum Version { UNVERSIONED(0), // Initial number we put on TableSplit when we introduced versioning. - INITIAL(-1); + INITIAL(-1), + // Added an encoded region name field for easier identification of split -> region + WITH_ENCODED_REGION_NAME(-2); final int code; static final Version[] byCode; @@ -78,11 +80,12 @@ implements Writable, Comparable { } } - private static final Version VERSION = Version.INITIAL; + private static final Version VERSION = Version.WITH_ENCODED_REGION_NAME; private TableName tableName; private byte [] startRow; private byte [] endRow; private String regionLocation; + private String encodedRegionName = ""; private String scan = ""; // stores the serialized form of the Scan private long length; // Contains estimation of region size in bytes @@ -95,6 +98,7 @@ implements Writable, Comparable { /** * Creates a new instance while assigning all variables. * Length of region is set to 0 + * Encoded name of the region is set to blank * * @param tableName The name of the current table. * @param scan The scan associated with this split. @@ -109,6 +113,7 @@ implements Writable, Comparable { /** * Creates a new instance while assigning all variables. + * Encoded name of region is set to blank * * @param tableName The name of the current table. * @param scan The scan associated with this split. @@ -118,6 +123,21 @@ implements Writable, Comparable { */ public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow, final String location, long length) { + this(tableName, scan, startRow, endRow, location, "", length); + } + + /** + * Creates a new instance while assigning all variables. + * + * @param tableName The name of the current table. + * @param scan The scan associated with this split. + * @param startRow The start row of the split. + * @param endRow The end row of the split. + * @param encodedRegionName The region ID. + * @param location The location of the region. + */ + public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow, + final String location, final String encodedRegionName, long length) { this.tableName = tableName; try { this.scan = @@ -128,11 +148,13 @@ implements Writable, Comparable { this.startRow = startRow; this.endRow = endRow; this.regionLocation = location; + this.encodedRegionName = encodedRegionName; this.length = length; } /** * Creates a new instance without a scanner. + * Length of region is set to 0 * * @param tableName The name of the current table. * @param startRow The start row of the split. @@ -228,6 +250,15 @@ implements Writable, Comparable { } /** + * Returns the region's encoded name. + * + * @return The region's encoded name. + */ + public String getEncodedRegionName() { + return encodedRegionName; + } + + /** * Returns the length of the split. * * @return The length of the split. @@ -271,6 +302,9 @@ implements Writable, Comparable { scan = Bytes.toString(Bytes.readByteArray(in)); } length = WritableUtils.readVLong(in); + if (version.atLeast(Version.WITH_ENCODED_REGION_NAME)) { + encodedRegionName = Bytes.toString(Bytes.readByteArray(in)); + } } /** @@ -288,6 +322,7 @@ implements Writable, Comparable { Bytes.writeByteArray(out, Bytes.toBytes(regionLocation)); Bytes.writeByteArray(out, Bytes.toBytes(scan)); WritableUtils.writeVLong(out, length); + Bytes.writeByteArray(out, Bytes.toBytes(encodedRegionName)); } /** @@ -316,6 +351,7 @@ implements Writable, Comparable { sb.append(", start row: ").append(Bytes.toStringBinary(startRow)); sb.append(", end row: ").append(Bytes.toStringBinary(endRow)); sb.append(", region location: ").append(regionLocation); + sb.append(", encoded region name: ").append(encodedRegionName); sb.append(")"); return sb.toString(); } @@ -348,13 +384,14 @@ implements Writable, Comparable { regionLocation.equals(((TableSplit)o).regionLocation); } - @Override - public int hashCode() { - int result = tableName != null ? tableName.hashCode() : 0; - result = 31 * result + (scan != null ? scan.hashCode() : 0); - result = 31 * result + (startRow != null ? Arrays.hashCode(startRow) : 0); - result = 31 * result + (endRow != null ? Arrays.hashCode(endRow) : 0); - result = 31 * result + (regionLocation != null ? regionLocation.hashCode() : 0); - return result; - } + @Override + public int hashCode() { + int result = tableName != null ? tableName.hashCode() : 0; + result = 31 * result + (scan != null ? scan.hashCode() : 0); + result = 31 * result + (startRow != null ? Arrays.hashCode(startRow) : 0); + result = 31 * result + (endRow != null ? Arrays.hashCode(endRow) : 0); + result = 31 * result + (regionLocation != null ? regionLocation.hashCode() : 0); + result = 31 * result + (encodedRegionName != null ? encodedRegionName.hashCode() : 0); + return result; + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java index 59f787f18f..47a5ca7244 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSplit.java @@ -93,13 +93,31 @@ public class TestTableSplit { "location"); String str = "HBase table split(table name: table, scan: , start row: row-start, " - + "end row: row-end, region location: location)"; + + "end row: row-end, region location: location, " + + "encoded region name: )"; + Assert.assertEquals(str, split.toString()); + + split = + new TableSplit(TableName.valueOf("table"), null, "row-start".getBytes(), + "row-end".getBytes(), "location", "encoded-region-name", 1000L); + str = + "HBase table split(table name: table, scan: , start row: row-start, " + + "end row: row-end, region location: location, " + + "encoded region name: encoded-region-name)"; Assert.assertEquals(str, split.toString()); split = new TableSplit((TableName) null, null, null, null); str = "HBase table split(table name: null, scan: , start row: null, " - + "end row: null, region location: null)"; + + "end row: null, region location: null, " + + "encoded region name: )"; + Assert.assertEquals(str, split.toString()); + + split = new TableSplit((TableName) null, null, null, null, null, null, 1000L); + str = + "HBase table split(table name: null, scan: , start row: null, " + + "end row: null, region location: null, " + + "encoded region name: null)"; Assert.assertEquals(str, split.toString()); } } -- 2.11.4.GIT