2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org
.apache
.hadoop
.hbase
.mapreduce
;
20 import static org
.junit
.Assert
.assertEquals
;
22 import java
.util
.HashMap
;
25 import org
.apache
.commons
.logging
.Log
;
26 import org
.apache
.commons
.logging
.LogFactory
;
27 import org
.apache
.hadoop
.fs
.FileStatus
;
28 import org
.apache
.hadoop
.fs
.FileSystem
;
29 import org
.apache
.hadoop
.fs
.Path
;
30 import org
.apache
.hadoop
.hbase
.HBaseTestingUtility
;
31 import org
.apache
.hadoop
.hbase
.TableName
;
32 import org
.apache
.hadoop
.hbase
.client
.Put
;
33 import org
.apache
.hadoop
.hbase
.client
.Table
;
34 import org
.apache
.hadoop
.hbase
.io
.ImmutableBytesWritable
;
35 import org
.apache
.hadoop
.hbase
.testclassification
.LargeTests
;
36 import org
.apache
.hadoop
.hbase
.util
.Bytes
;
37 import org
.apache
.hadoop
.io
.MapFile
;
38 import org
.junit
.AfterClass
;
39 import org
.junit
.Assert
;
40 import org
.junit
.BeforeClass
;
41 import org
.junit
.Rule
;
42 import org
.junit
.Test
;
43 import org
.junit
.experimental
.categories
.Category
;
45 import com
.google
.common
.collect
.ImmutableMap
;
46 import com
.google
.common
.collect
.Maps
;
47 import org
.junit
.rules
.TestName
;
50 * Basic test for the HashTable M/R tool
52 @Category(LargeTests
.class)
53 public class TestHashTable
{
55 private static final Log LOG
= LogFactory
.getLog(TestHashTable
.class);
57 private static final HBaseTestingUtility TEST_UTIL
= new HBaseTestingUtility();
60 public TestName name
= new TestName();
63 public static void beforeClass() throws Exception
{
64 TEST_UTIL
.startMiniCluster(3);
68 public static void afterClass() throws Exception
{
69 TEST_UTIL
.shutdownMiniCluster();
73 public void testHashTable() throws Exception
{
74 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
75 final byte[] family
= Bytes
.toBytes("family");
76 final byte[] column1
= Bytes
.toBytes("c1");
77 final byte[] column2
= Bytes
.toBytes("c2");
78 final byte[] column3
= Bytes
.toBytes("c3");
84 byte[][] splitRows
= new byte[numRegions
-1][];
85 for (int i
= 1; i
< numRegions
; i
++) {
86 splitRows
[i
-1] = Bytes
.toBytes(numRows
* i
/ numRegions
);
89 long timestamp
= 1430764183454L;
90 // put rows into the first table
91 Table t1
= TEST_UTIL
.createTable(tableName
, family
, splitRows
);
92 for (int i
= 0; i
< numRows
; i
++) {
93 Put p
= new Put(Bytes
.toBytes(i
), timestamp
);
94 p
.addColumn(family
, column1
, column1
);
95 p
.addColumn(family
, column2
, column2
);
96 p
.addColumn(family
, column3
, column3
);
101 HashTable hashTable
= new HashTable(TEST_UTIL
.getConfiguration());
103 Path testDir
= TEST_UTIL
.getDataTestDirOnTestFS(tableName
.getNameAsString());
105 long batchSize
= 300;
106 int code
= hashTable
.run(new String
[] {
107 "--batchsize=" + batchSize
,
108 "--numhashfiles=" + numHashFiles
,
110 tableName
.getNameAsString(),
111 testDir
.toString()});
112 assertEquals("test job failed", 0, code
);
114 FileSystem fs
= TEST_UTIL
.getTestFileSystem();
116 HashTable
.TableHash tableHash
= HashTable
.TableHash
.read(fs
.getConf(), testDir
);
117 assertEquals(tableName
.getNameAsString(), tableHash
.tableName
);
118 assertEquals(batchSize
, tableHash
.batchSize
);
119 assertEquals(numHashFiles
, tableHash
.numHashFiles
);
120 assertEquals(numHashFiles
- 1, tableHash
.partitions
.size());
121 for (ImmutableBytesWritable bytes
: tableHash
.partitions
) {
122 LOG
.debug("partition: " + Bytes
.toInt(bytes
.get()));
125 ImmutableMap
<Integer
, ImmutableBytesWritable
> expectedHashes
126 = ImmutableMap
.<Integer
, ImmutableBytesWritable
>builder()
127 .put(-1, new ImmutableBytesWritable(Bytes
.fromHex("714cb10a9e3b5569852980edd8c6ca2f")))
128 .put(5, new ImmutableBytesWritable(Bytes
.fromHex("28d961d9252ce8f8d44a07b38d3e1d96")))
129 .put(10, new ImmutableBytesWritable(Bytes
.fromHex("f6bbc4a224d8fd929b783a92599eaffa")))
130 .put(15, new ImmutableBytesWritable(Bytes
.fromHex("522deb5d97f73a414ecc11457be46881")))
131 .put(20, new ImmutableBytesWritable(Bytes
.fromHex("b026f2611aaa46f7110116d807545352")))
132 .put(25, new ImmutableBytesWritable(Bytes
.fromHex("39ffc1a3094aa12a2e90ffd9cef2ce93")))
133 .put(30, new ImmutableBytesWritable(Bytes
.fromHex("f6b4d75727ce9a30ac29e4f08f601666")))
134 .put(35, new ImmutableBytesWritable(Bytes
.fromHex("422e2d2f1eb79a8f02171a705a42c090")))
135 .put(40, new ImmutableBytesWritable(Bytes
.fromHex("559ad61c900fffefea0a15abf8a97bc3")))
136 .put(45, new ImmutableBytesWritable(Bytes
.fromHex("23019084513eca41cee436b2a29611cb")))
137 .put(50, new ImmutableBytesWritable(Bytes
.fromHex("b40467d222ddb4949b142fe145ee9edc")))
138 .put(55, new ImmutableBytesWritable(Bytes
.fromHex("372bf89fcd8ca4b7ab3c1add9d07f7e4")))
139 .put(60, new ImmutableBytesWritable(Bytes
.fromHex("69ae0585e6255de27dce974e332b8f8b")))
140 .put(65, new ImmutableBytesWritable(Bytes
.fromHex("8029610044297aad0abdbecd485d8e59")))
141 .put(70, new ImmutableBytesWritable(Bytes
.fromHex("de5f784f7f78987b6e57ecfd81c8646f")))
142 .put(75, new ImmutableBytesWritable(Bytes
.fromHex("1cd757cc4e1715c8c3b1c24447a1ec56")))
143 .put(80, new ImmutableBytesWritable(Bytes
.fromHex("f9a53aacfeb6142b08066615e7038095")))
144 .put(85, new ImmutableBytesWritable(Bytes
.fromHex("89b872b7e639df32d3276b33928c0c91")))
145 .put(90, new ImmutableBytesWritable(Bytes
.fromHex("45eeac0646d46a474ea0484175faed38")))
146 .put(95, new ImmutableBytesWritable(Bytes
.fromHex("f57c447e32a08f4bf1abb2892839ac56")))
149 Map
<Integer
, ImmutableBytesWritable
> actualHashes
= new HashMap
<>();
150 Path dataDir
= new Path(testDir
, HashTable
.HASH_DATA_DIR
);
151 for (int i
= 0; i
< numHashFiles
; i
++) {
152 Path hashPath
= new Path(dataDir
, HashTable
.TableHash
.getDataFileName(i
));
154 MapFile
.Reader reader
= new MapFile
.Reader(hashPath
, fs
.getConf());
155 ImmutableBytesWritable key
= new ImmutableBytesWritable();
156 ImmutableBytesWritable hash
= new ImmutableBytesWritable();
157 while(reader
.next(key
, hash
)) {
158 String keyString
= Bytes
.toHex(key
.get(), key
.getOffset(), key
.getLength());
159 LOG
.debug("Key: " + (keyString
.isEmpty() ?
"-1" : Integer
.parseInt(keyString
, 16))
160 + " Hash: " + Bytes
.toHex(hash
.get(), hash
.getOffset(), hash
.getLength()));
163 if (key
.getLength() > 0) {
164 intKey
= Bytes
.toInt(key
.get(), key
.getOffset(), key
.getLength());
166 if (actualHashes
.containsKey(intKey
)) {
167 Assert
.fail("duplicate key in data files: " + intKey
);
169 actualHashes
.put(intKey
, new ImmutableBytesWritable(hash
.copyBytes()));
174 FileStatus
[] files
= fs
.listStatus(testDir
);
175 for (FileStatus file
: files
) {
176 LOG
.debug("Output file: " + file
.getPath());
179 files
= fs
.listStatus(dataDir
);
180 for (FileStatus file
: files
) {
181 LOG
.debug("Data file: " + file
.getPath());
184 if (!expectedHashes
.equals(actualHashes
)) {
185 LOG
.error("Diff: " + Maps
.difference(expectedHashes
, actualHashes
));
187 Assert
.assertEquals(expectedHashes
, actualHashes
);
189 TEST_UTIL
.deleteTable(tableName
);
190 TEST_UTIL
.cleanupDataTestDirOnTestFS();