3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
19 package org
.apache
.hadoop
.hbase
.mapreduce
;
21 import java
.io
.IOException
;
22 import org
.apache
.yetus
.audience
.InterfaceAudience
;
23 import org
.slf4j
.Logger
;
24 import org
.slf4j
.LoggerFactory
;
25 import org
.apache
.hadoop
.conf
.Configurable
;
26 import org
.apache
.hadoop
.conf
.Configuration
;
27 import org
.apache
.hadoop
.hbase
.HBaseConfiguration
;
28 import org
.apache
.hadoop
.hbase
.TableName
;
29 import org
.apache
.hadoop
.hbase
.client
.Connection
;
30 import org
.apache
.hadoop
.hbase
.client
.ConnectionFactory
;
31 import org
.apache
.hadoop
.hbase
.client
.RegionLocator
;
32 import org
.apache
.hadoop
.hbase
.io
.ImmutableBytesWritable
;
33 import org
.apache
.hadoop
.hbase
.mapred
.TableOutputFormat
;
34 import org
.apache
.hadoop
.hbase
.util
.Bytes
;
35 import org
.apache
.hadoop
.mapreduce
.Partitioner
;
38 * This is used to partition the output keys into groups of keys.
39 * Keys are grouped according to the regions that currently exist
40 * so that each reducer fills a single region so load is distributed.
42 * <p>This class is not suitable as partitioner creating hfiles
43 * for incremental bulk loads as region spread will likely change between time of
44 * hfile creation and load time. See {@link org.apache.hadoop.hbase.tool.BulkLoadHFiles}
45 * and <a href="http://hbase.apache.org/book.html#arch.bulk.load">Bulk Load</a>.</p>
47 * @param <KEY> The type of the key.
48 * @param <VALUE> The type of the value.
50 @InterfaceAudience.Public
51 public class HRegionPartitioner
<KEY
, VALUE
>
52 extends Partitioner
<ImmutableBytesWritable
, VALUE
>
53 implements Configurable
{
55 private static final Logger LOG
= LoggerFactory
.getLogger(HRegionPartitioner
.class);
56 private Configuration conf
= null;
57 // Connection and locator are not cleaned up; they just die when partitioner is done.
58 private Connection connection
;
59 private RegionLocator locator
;
60 private byte[][] startKeys
;
63 * Gets the partition number for a given key (hence record) given the total
64 * number of partitions i.e. number of reduce-tasks for the job.
66 * <p>Typically a hash function on a all or a subset of the key.</p>
68 * @param key The key to be partitioned.
69 * @param value The entry value.
70 * @param numPartitions The total number of partitions.
71 * @return The partition number for the <code>key</code>.
72 * @see org.apache.hadoop.mapreduce.Partitioner#getPartition(
73 * java.lang.Object, java.lang.Object, int)
76 public int getPartition(ImmutableBytesWritable key
,
77 VALUE value
, int numPartitions
) {
79 // Only one region return 0
80 if (this.startKeys
.length
== 1){
84 // Not sure if this is cached after a split so we could have problems
85 // here if a region splits while mapping
86 region
= this.locator
.getRegionLocation(key
.get()).getRegion().getStartKey();
87 } catch (IOException e
) {
88 LOG
.error(e
.toString(), e
);
90 for (int i
= 0; i
< this.startKeys
.length
; i
++){
91 if (Bytes
.compareTo(region
, this.startKeys
[i
]) == 0 ){
92 if (i
>= numPartitions
-1){
93 // cover if we have less reduces then regions.
94 return (Integer
.toString(i
).hashCode()
95 & Integer
.MAX_VALUE
) % numPartitions
;
100 // if above fails to find start key that match we need to return something
105 * Returns the current configuration.
107 * @return The current configuration.
108 * @see org.apache.hadoop.conf.Configurable#getConf()
111 public Configuration
getConf() {
116 * Sets the configuration. This is used to determine the start keys for the
119 * @param configuration The configuration to set.
120 * @see org.apache.hadoop.conf.Configurable#setConf(
121 * org.apache.hadoop.conf.Configuration)
124 public void setConf(Configuration configuration
) {
125 this.conf
= HBaseConfiguration
.create(configuration
);
127 this.connection
= ConnectionFactory
.createConnection(HBaseConfiguration
.create(conf
));
128 TableName tableName
= TableName
.valueOf(conf
.get(TableOutputFormat
.OUTPUT_TABLE
));
129 this.locator
= this.connection
.getRegionLocator(tableName
);
130 } catch (IOException e
) {
131 LOG
.error(e
.toString(), e
);
134 this.startKeys
= this.locator
.getStartKeys();
135 } catch (IOException e
) {
136 LOG
.error(e
.toString(), e
);