3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 package org
.apache
.hadoop
.hbase
.util
;
22 import static java
.lang
.Integer
.rotateLeft
;
24 import java
.io
.FileInputStream
;
25 import java
.io
.IOException
;
27 import org
.apache
.yetus
.audience
.InterfaceAudience
;
28 import org
.apache
.yetus
.audience
.InterfaceStability
;
31 * Produces 32-bit hash for hash table lookup.
33 * <pre>lookup3.c, by Bob Jenkins, May 2006, Public Domain.
35 * You can use this free for any purpose. It's in the public domain.
39 * @see <a href="http://burtleburtle.net/bob/c/lookup3.c">lookup3.c</a>
40 * @see <a href="http://www.ddj.com/184410284">Hash Functions (and how this
41 * function compares to others such as CRC, MD?, etc</a>
42 * @see <a href="http://burtleburtle.net/bob/hash/doobs.html">Has update on the
43 * Dr. Dobbs Article</a>
45 @InterfaceAudience.Private
46 @InterfaceStability.Stable
47 public class JenkinsHash
extends Hash
{
48 private static final int BYTE_MASK
= 0xff;
50 private static JenkinsHash _instance
= new JenkinsHash();
52 public static Hash
getInstance() {
57 * Compute the hash of the specified file
58 * @param args name of file to compute hash of.
59 * @throws IOException e
61 public static void main(String
[] args
) throws IOException
{
62 if (args
.length
!= 1) {
63 System
.err
.println("Usage: JenkinsHash filename");
66 FileInputStream in
= new FileInputStream(args
[0]);
67 byte[] bytes
= new byte[512];
69 JenkinsHash hash
= new JenkinsHash();
71 for (int length
= in
.read(bytes
); length
> 0; length
= in
.read(bytes
)) {
72 value
= hash
.hash(new ByteArrayHashKey(bytes
, 0, length
), value
);
77 System
.out
.println(Math
.abs(value
));
81 * taken from hashlittle() -- hash a variable-length key into a 32-bit value
83 * @param hashKey the key to extract the bytes for hash algo
84 * @param initval can be any integer value
85 * @return a 32-bit value. Every bit of the key affects every bit of the
86 * return value. Two keys differing by one or two bits will have totally
87 * different hash values.
89 * <p>The best hash table sizes are powers of 2. There is no need to do mod
90 * a prime (mod is sooo slow!). If you need less than 32 bits, use a bitmask.
91 * For example, if you need only 10 bits, do
92 * <code>h = (h & hashmask(10));</code>
93 * In which case, the hash table should have hashsize(10) elements.
95 * <p>If you are hashing n strings byte[][] k, do it like this:
96 * for (int i = 0, h = 0; i < n; ++i) h = hash( k[i], h);
98 * <p>By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
99 * code any way you wish, private, educational, or commercial. It's free.
101 * <p>Use for hash table lookup, or anything where one collision in 2^^32 is
102 * acceptable. Do NOT use for cryptographic purposes.
104 @SuppressWarnings({"fallthrough", "MissingDefault"})
106 public <T
> int hash(HashKey
<T
> hashKey
, int initval
) {
107 int length
= hashKey
.length();
109 a
= b
= c
= 0xdeadbeef + length
+ initval
;
111 for (; length
> 12; offset
+= 12, length
-= 12) {
112 a
+= (hashKey
.get(offset
) & BYTE_MASK
);
113 a
+= ((hashKey
.get(offset
+ 1) & BYTE_MASK
) << 8);
114 a
+= ((hashKey
.get(offset
+ 2) & BYTE_MASK
) << 16);
115 a
+= ((hashKey
.get(offset
+ 3) & BYTE_MASK
) << 24);
116 b
+= (hashKey
.get(offset
+ 4) & BYTE_MASK
);
117 b
+= ((hashKey
.get(offset
+ 5) & BYTE_MASK
) << 8);
118 b
+= ((hashKey
.get(offset
+ 6) & BYTE_MASK
) << 16);
119 b
+= ((hashKey
.get(offset
+ 7) & BYTE_MASK
) << 24);
120 c
+= (hashKey
.get(offset
+ 8) & BYTE_MASK
);
121 c
+= ((hashKey
.get(offset
+ 9) & BYTE_MASK
) << 8);
122 c
+= ((hashKey
.get(offset
+ 10) & BYTE_MASK
) << 16);
123 c
+= ((hashKey
.get(offset
+ 11) & BYTE_MASK
) << 24);
126 * mix -- mix 3 32-bit values reversibly.
127 * This is reversible, so any information in (a,b,c) before mix() is
128 * still in (a,b,c) after mix().
130 * If four pairs of (a,b,c) inputs are run through mix(), or through
131 * mix() in reverse, there are at least 32 bits of the output that
132 * are sometimes the same for one pair and different for another pair.
134 * This was tested for:
135 * - pairs that differed by one bit, by two bits, in any combination
136 * of top bits of (a,b,c), or in any combination of bottom bits of
138 * - "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
139 * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
140 * is commonly produced by subtraction) look like a single 1-bit
142 * - the base values were pseudorandom, all zero but one bit set, or
143 * all zero plus a counter that starts at zero.
145 * Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
150 * Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing for
151 * "differ" defined as + with a one-bit base and a two-bit delta. I
152 * used http://burtleburtle.net/bob/hash/avalanche.html to choose
153 * the operations, constants, and arrangements of the variables.
155 * This does not achieve avalanche. There are input bits of (a,b,c)
156 * that fail to affect some output bits of (a,b,c), especially of a.
157 * The most thoroughly mixed value is c, but it doesn't really even
158 * achieve avalanche in c.
160 * This allows some parallelism. Read-after-writes are good at doubling
161 * the number of bits affected, so the goal of mixing pulls in the
162 * opposite direction as the goal of parallelism. I did what I could.
163 * Rotates seem to cost as much as shifts on every machine I could lay
164 * my hands on, and rotates are much kinder to the top and bottom bits,
167 * #define mix(a,b,c) \
169 * a -= c; a ^= rot(c, 4); c += b; \
170 * b -= a; b ^= rot(a, 6); a += c; \
171 * c -= b; c ^= rot(b, 8); b += a; \
172 * a -= c; a ^= rot(c,16); c += b; \
173 * b -= a; b ^= rot(a,19); a += c; \
174 * c -= b; c ^= rot(b, 4); b += a; \
179 a
-= c
; a ^
= rotateLeft(c
, 4); c
+= b
;
180 b
-= a
; b ^
= rotateLeft(a
, 6); a
+= c
;
181 c
-= b
; c ^
= rotateLeft(b
, 8); b
+= a
;
182 a
-= c
; a ^
= rotateLeft(c
, 16); c
+= b
;
183 b
-= a
; b ^
= rotateLeft(a
, 19); a
+= c
;
184 c
-= b
; c ^
= rotateLeft(b
, 4); b
+= a
;
187 //-------------------------------- last block: affect all 32 bits of (c)
188 switch (length
) { // all the case statements fall through
190 c
+= ((hashKey
.get(offset
+ 11) & BYTE_MASK
) << 24);
192 c
+= ((hashKey
.get(offset
+ 10) & BYTE_MASK
) << 16);
194 c
+= ((hashKey
.get(offset
+ 9) & BYTE_MASK
) << 8);
196 c
+= (hashKey
.get(offset
+ 8) & BYTE_MASK
);
198 b
+= ((hashKey
.get(offset
+ 7) & BYTE_MASK
) << 24);
200 b
+= ((hashKey
.get(offset
+ 6) & BYTE_MASK
) << 16);
202 b
+= ((hashKey
.get(offset
+ 5) & BYTE_MASK
) << 8);
204 b
+= (hashKey
.get(offset
+ 4) & BYTE_MASK
);
206 a
+= ((hashKey
.get(offset
+ 3) & BYTE_MASK
) << 24);
208 a
+= ((hashKey
.get(offset
+ 2) & BYTE_MASK
) << 16);
210 a
+= ((hashKey
.get(offset
+ 1) & BYTE_MASK
) << 8);
212 //noinspection PointlessArithmeticExpression
213 a
+= (hashKey
.get(offset
+ 0) & BYTE_MASK
);
219 * final -- final mixing of 3 32-bit values (a,b,c) into c
221 * Pairs of (a,b,c) values differing in only a few bits will usually
222 * produce values of c that look totally different. This was tested for
223 * - pairs that differed by one bit, by two bits, in any combination
224 * of top bits of (a,b,c), or in any combination of bottom bits of
227 * - "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
228 * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
229 * is commonly produced by subtraction) look like a single 1-bit
232 * - the base values were pseudorandom, all zero but one bit set, or
233 * all zero plus a counter that starts at zero.
235 * These constants passed:
236 * 14 11 25 16 4 14 24
237 * 12 14 25 16 4 14 24
238 * and these came close:
243 * #define final(a,b,c) \
245 * c ^= b; c -= rot(b,14); \
246 * a ^= c; a -= rot(c,11); \
247 * b ^= a; b -= rot(a,25); \
248 * c ^= b; c -= rot(b,16); \
249 * a ^= c; a -= rot(c,4); \
250 * b ^= a; b -= rot(a,14); \
251 * c ^= b; c -= rot(b,24); \
255 c ^
= b
; c
-= rotateLeft(b
, 14);
256 a ^
= c
; a
-= rotateLeft(c
, 11);
257 b ^
= a
; b
-= rotateLeft(a
, 25);
258 c ^
= b
; c
-= rotateLeft(b
, 16);
259 a ^
= c
; a
-= rotateLeft(c
, 4);
260 b ^
= a
; b
-= rotateLeft(a
, 14);
261 c ^
= b
; c
-= rotateLeft(b
, 24);