1 /*-----------------------------------------------------------------------------
2 * MurmurHash3 was written by Austin Appleby, and is placed in the public
5 * This implementation was written by Shane Day, and is also public domain.
6 * Translated to D by Ketmar // Invisible Vector
8 * This is a D implementation of MurmurHash3_x86_32 (Murmur3A) with support
9 * for progressive processing.
11 module iv
.hash
.murhash
/*is aliced*/;
14 /*-----------------------------------------------------------------------------
16 If you want to understand the MurmurHash algorithm you would be much better
17 off reading the original source. Just point your browser at:
18 http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
21 What this version provides?
23 Progressive data feeding. Useful when the entire payload to be hashed
24 does not fit in memory or when the data is streamed through the application.
25 Also useful when hashing a number of strings with a common prefix. A partial
26 hash of a prefix string can be generated and reused for each suffix string.
31 We can only process entire 32 bit chunks of input, except for the very end
32 that may be shorter. So along with the partial hash we need to give back to
33 the caller a carry containing up to 3 bytes that we were unable to process.
34 This carry also needs to record the number of bytes the carry holds. I use
35 the low 2 bits as a count (0..3) and the carry bytes are shifted into the
36 high byte in stream order.
38 To handle endianess I simply use a macro that reads a uint32_t and define
39 that macro to be a direct read on little endian machines, a read and swap
40 on big endian machines, or a byte-by-byte read if the endianess is unknown.
42 -----------------------------------------------------------------------------*/
44 public struct MurHash
{
46 enum has64bit
= false;
50 enum C1
= 0xcc9e2d51u
;
51 enum C2
= 0x1b873593u
;
54 uint seed
; // initial seed value; MUST BE FIRST
55 uint hash
; // current value
56 uint accum
; // we operate 32-bit chunks; low 2 bits of accum used as counter
57 uint totallen
; // to match the original Murmur3A
60 nothrow @trusted @nogc:
61 /// construct state with seed
62 this (uint aseed
) pure { hash
= seed
= aseed
; }
65 void reset () pure { accum
= totallen
= 0; hash
= seed
; }
68 void reset (uint aseed
) pure { accum
= totallen
= 0; hash
= seed
= aseed
; }
70 /// process data block
71 void put(T
) (scope const(T
)[] data
...) if (T
.sizeof
== 1) {
72 if (data
.length
== 0) return; // nothing to do
73 auto bytes
= cast(const(ubyte)*)data
.ptr
;
74 auto len
= data
.length
;
75 static if (len
.sizeof
> uint.sizeof
) {
76 if (len
> uint.max
) assert(0, "MurHash: too much data");
78 if (uint.max
-totallen
< len
) assert(0, "MurHash: too much data"); // overflow
82 // extract carry count from low 2 bits of accum value
84 // consume any carry bytes
88 acc
= (acc
>>8)|
(*bytes
++<<24);
93 acc
= (acc
<<15)|
(acc
>>(32-15));
96 hh
= (hh
<<13)|
(hh
>>(32-13));
101 // process 32-bit chunks
103 foreach (immutable _
; 0..len
/4) {
104 version(LittleEndian
) {
106 k1
= (bytes
[0])|
(bytes
[1]<<8)|
(bytes
[2]<<16)|
(bytes
[3]<<24);
108 k1
= *cast(const(uint)*)bytes
;
112 k1
= (bytes
[3])|
(bytes
[2]<<8)|
(bytes
[1]<<16)|
(bytes
[0]<<24);
114 import core
.bitop
: bswap;
115 k1
= bswap(*cast(const(uint)*)bytes
);
120 k1
= (k1
<<15)|
(k1
>>(32-15));
123 hh
= (hh
<<13)|
(hh
>>(32-13));
124 hh
= hh
*5+0xe6546b64;
126 // advance over whole 32-bit chunks, possibly leaving 1..3 bytes
128 // append any remaining bytes into carry
130 acc
= (acc
>>8)|
(*bytes
++<<24);
134 acc
= (acc
<<15)|
(acc
>>(32-15));
137 hh
= (hh
<<13)|
(hh
>>(32-13));
138 hh
= hh
*5+0xe6546b64;
141 // store accum counter
148 /// finalize a hash (i.e. return current result).
149 /// note that you can continue putting data, as this is not destructive
150 @property uint result32 () const pure {
155 uint k1
= acc
>>(4-n
)*8;
157 k1
= (k1
<<15)|
(k1
>>(32-15));
171 uint finish32 () pure { auto res
= result32
; reset(); return res
; } /// resets state
176 * 32-bit implementation of Murmur3
183 uint murHash32(T) (const(T)[] buf, uint seed=0) nothrow @trusted @nogc if (T.sizeof == 1) {
184 auto hh = MurHash(seed);
192 * MurmurHash3 was written by Austin Appleby, and is placed in the public domain.
201 uint murHash32(T
) (const(T
)[] data
, uint seed
=0) pure nothrow @trusted @nogc if (T
.sizeof
== 1) {
202 enum C1
= 0xcc9e2d51u
;
203 enum C2
= 0x1b873593u
;
205 uint hash
= seed
; // current value
208 // process data blocks
210 auto bytes
= cast(const(ubyte)*)data
.ptr
;
211 auto len
= data
.length
;
212 // process 32-bit chunks
213 foreach (immutable _
; 0..len
/4) {
214 version(LittleEndian
) {
216 k1
= (bytes
[0])|
(bytes
[1]<<8)|
(bytes
[2]<<16)|
(bytes
[3]<<24);
218 k1
= *cast(const(uint)*)bytes
;
222 k1
= (bytes
[3])|
(bytes
[2]<<8)|
(bytes
[1]<<16)|
(bytes
[0]<<24);
224 import core
.bitop
: bswap;
225 k1
= bswap(*cast(const(uint)*)bytes
);
230 k1
= (k1
<<15)|
(k1
>>(32-15));
233 hash
= (hash
<<13)|
(hash
>>(32-13));
234 hash
= hash
*5+0xe6546b64;
236 // advance over whole 32-bit chunks, possibly leaving 1..3 bytes
237 if ((len
&= 0x03) != 0) {
238 immutable ubyte n
= cast(ubyte)len
;
239 // append any remaining bytes into carry
241 while (len
--) accum
= (accum
>>8)|
(*bytes
++<<24);
243 k1
= accum
>>((4-n
)*8);
245 k1
= (k1
<<15)|
(k1
>>(32-15));
249 hash ^
= cast(uint)data
.length
;
264 * MurmurHash3 was written by Austin Appleby, and is placed in the public domain.
273 uint murHash32(T
) (const(T
)[] data
, uint seed
=0) pure nothrow @trusted @nogc if (T
.sizeof
> 1) {
274 return murHash32((cast(const(ubyte)*)buf
.ptr
)[0..buf
.length
*T
.sizeof
]);
278 version(iv_hash_unittest
) unittest {
279 // wow, we can do this in compile time!
280 static assert(murHash32("Alice & Miriel") == 0x295db5e7u
);
282 static uint xmurhash32(T
) (const(T
)[] buf
, uint seed
=0) nothrow @trusted @nogc if (T
.sizeof
== 1) {
283 auto hh
= MurHash(seed
);
287 static assert(xmurhash32("Alice & Miriel") == 0x295db5e7u
);
292 writefln("0x%08xU", murHash32("Alice & Miriel"));
295 mixin(import("test.d"));
296 doTest
!(32, "MurHash")("Alice & Miriel", 0x295db5e7u
);
297 assert(murHash32("Alice & Miriel") == 0x295db5e7u
);
298 assert(xmurhash32("Alice & Miriel") == 0x295db5e7u
);