1 import iv
.hash
.fasthash
;
3 import iv
.hash
.murhash
;
8 // /tmp/bigtext/dump.sql
12 // ////////////////////////////////////////////////////////////////////////// //
16 foreach (string s
; VFile("_00/zwords.txt").byLineCopy
) words
~= s
;
20 // ////////////////////////////////////////////////////////////////////////// //
21 uint sfHash (const(char)[] str) {
22 return SFhashOf(str.ptr
, str.length
);
26 usize
SFhashOf( const (void)* buf
, usize len
, usize seed
= 0 ) @trusted pure nothrow @nogc
29 * This is Paul Hsieh's SuperFastHash algorithm, described here:
30 * http://www.azillionmonkeys.com/qed/hash.html
31 * It is protected by the following open source license:
32 * http://www.azillionmonkeys.com/qed/weblicense.html
34 static uint get16bits( const (ubyte)* x
) pure nothrow
36 // CTFE doesn't support casting ubyte* -> ushort*, so revert to
37 // per-byte access when in CTFE.
38 version( HasUnalignedOps
)
41 return *cast(ushort*) x
;
44 return ((cast(uint) x
[1]) << 8) + (cast(uint) x
[0]);
47 // NOTE: SuperFastHash normally starts with a zero hash value. The seed
48 // value was incorporated to allow chaining.
49 auto data
= cast(const (ubyte)*) buf
;
53 if( len
<= 0 || data
is null )
59 for( ; len
> 0; len
-- )
61 hash
+= get16bits( data
);
62 auto tmp
= (get16bits( data
+ 2 ) << 11) ^ hash
;
63 hash
= (hash
<< 16) ^ tmp
;
64 data
+= 2 * ushort.sizeof
;
70 case 3: hash
+= get16bits( data
);
72 hash ^
= data
[ushort.sizeof
] << 18;
75 case 2: hash
+= get16bits( data
);
79 case 1: hash
+= *data
;
87 /* Force "avalanching" of final 127 bits */
99 // ////////////////////////////////////////////////////////////////////////// //
100 uint murHashX (const(char)[] str, usize seed
=0) {
101 return MurHashOf(str, seed
);
105 usize
MurHashOf (const(char)[] data
, usize seed
=0) pure nothrow @trusted @nogc {
106 enum C1
= 0xcc9e2d51u
;
107 enum C2
= 0x1b873593u
;
109 uint hash
= seed
; // current value
110 uint accum
; // we operate 32-bit chunks; low 2 bits of accum used as counter
114 // process data blocks
116 auto bytes
= cast(const(ubyte)*)data
.ptr
;
117 auto len
= data
.length
;
118 // process 32-bit chunks
119 foreach (immutable _
; 0..len
/4) {
120 version(LittleEndian
) {
122 k1
= (bytes
[0])|
(bytes
[1]<<8)|
(bytes
[2]<<16)|
(bytes
[3]<<24);
124 k1
= *cast(const(uint)*)bytes
;
128 k1
= (bytes
[3])|
(bytes
[2]<<8)|
(bytes
[1]<<16)|
(bytes
[0]<<24);
130 import core
.bitop
: bswap;
131 k1
= bswap(*cast(const(uint)*)bytes
);
136 k1
= (k1
<<15)|
(k1
>>(32-15));
139 hash
= (hash
<<13)|
(hash
>>(32-13));
140 hash
= hash
*5+0xe6546b64;
142 // advance over whole 32-bit chunks, possibly leaving 1..3 bytes
145 // append any remaining bytes into carry
146 while (len
--) accum
= (accum
>>8)|
(*bytes
++<<24);
153 k1
= (k1
<<15)|
(k1
>>(32-15));
157 hash ^
= cast(uint)data
.length
;
169 * MurmurHash3 was written by Austin Appleby, and is placed in the public domain.
178 uint murHash32 (const(char)[] data
, uint seed
=0) pure nothrow @trusted @nogc {
179 enum C1
= 0xcc9e2d51u
;
180 enum C2
= 0x1b873593u
;
182 uint hash
= seed
; // current value
185 // process data blocks
187 auto bytes
= cast(const(ubyte)*)data
.ptr
;
188 auto len
= data
.length
;
189 // process 32-bit chunks
190 foreach (immutable _
; 0..len
/4) {
191 version(LittleEndian
) {
193 k1
= (bytes
[0])|
(bytes
[1]<<8)|
(bytes
[2]<<16)|
(bytes
[3]<<24);
195 k1
= *cast(const(uint)*)bytes
;
199 k1
= (bytes
[3])|
(bytes
[2]<<8)|
(bytes
[1]<<16)|
(bytes
[0]<<24);
201 import core
.bitop
: bswap;
202 k1
= bswap(*cast(const(uint)*)bytes
);
207 k1
= (k1
<<15)|
(k1
>>(32-15));
210 hash
= (hash
<<13)|
(hash
>>(32-13));
211 hash
= hash
*5+0xe6546b64;
213 // advance over whole 32-bit chunks, possibly leaving 1..3 bytes
214 if ((len
&= 0x03) != 0) {
215 immutable ubyte n
= cast(ubyte)len
;
216 // append any remaining bytes into carry
218 while (len
--) accum
= (accum
>>8)|
(*bytes
++<<24);
220 k1
= accum
>>((4-n
)*8);
222 k1
= (k1
<<15)|
(k1
>>(32-15));
226 hash ^
= cast(uint)data
.length
;
241 usize
bytesHash3(const(char)[] buf
, usize seed
=0) @system nothrow @nogc {
242 static uint rotl32(uint n
)(in uint x
) pure nothrow @safe @nogc
244 return (x
<< n
) |
(x
>> (32 - n
));
247 //-----------------------------------------------------------------------------
248 // Block read - if your platform needs to do endian-swapping or can only
249 // handle aligned reads, do the conversion here
250 static uint get32bits(const (ubyte)* x
) pure nothrow @nogc
252 //Compiler can optimize this code to simple *cast(uint*)x if it possible.
253 version(HasUnalignedOps
)
256 return *cast(uint*)x
; //BUG: Can't be inlined by DMD
260 return ((cast(uint) x
[0]) << 24) |
((cast(uint) x
[1]) << 16) |
((cast(uint) x
[2]) << 8) |
(cast(uint) x
[3]);
264 return ((cast(uint) x
[3]) << 24) |
((cast(uint) x
[2]) << 16) |
((cast(uint) x
[1]) << 8) |
(cast(uint) x
[0]);
268 //-----------------------------------------------------------------------------
269 // Finalization mix - force all bits of a hash block to avalanche
270 static uint fmix32(uint h
) pure nothrow @safe @nogc
281 auto len
= buf
.length
;
282 auto data
= cast(const(ubyte)*)buf
.ptr
;
283 auto nblocks
= len
/ 4;
285 uint h1
= cast(uint)seed
;
287 enum uint c1
= 0xcc9e2d51;
288 enum uint c2
= 0x1b873593;
289 enum uint c3
= 0xe6546b64;
293 auto end_data
= data
+nblocks
*uint.sizeof
;
294 for(; data
!=end_data
; data
+= uint.sizeof
)
296 uint k1
= get32bits(data
);
312 case 3: k1 ^
= data
[2] << 16; goto case;
313 case 2: k1 ^
= data
[1] << 8; goto case;
314 case 1: k1 ^
= data
[0];
315 k1
*= c1
; k1
= rotl32
!15(k1
); k1
*= c2
; h1 ^
= k1
;
328 // ////////////////////////////////////////////////////////////////////////// //
330 * 64-bit implementation of fasthash
337 * 32-bit or 64-bit hash
339 usize
FFhashOf (const(void)[] buf
, usize seed
=0) /*pure*/ nothrow @trusted @nogc {
342 (cast(ulong)data
[1]<<8)|
343 (cast(ulong)data
[2]<<16)|
344 (cast(ulong)data
[3]<<24)|
345 (cast(ulong)data
[4]<<32)|
346 (cast(ulong)data
[5]<<40)|
347 (cast(ulong)data
[6]<<48)|
348 (cast(ulong)data
[7]<<56)
350 enum m
= 0x880355f21e6d1965UL
;
351 auto data
= cast(const(ubyte)*)buf
;
352 immutable len
= buf
.length
;
355 foreach (immutable _
; 0..len
/8) {
356 version(HasUnalignedOps
) {
358 t
= mixin(Get8Bytes
);
360 t
= *cast(ulong*)data
;
363 t
= mixin(Get8Bytes
);
367 t
*= 0x2127599bf4325c37UL
;
376 case 7: t ^
= cast(ulong)data
[6]<<48; goto case 6;
377 case 6: t ^
= cast(ulong)data
[5]<<40; goto case 5;
378 case 5: t ^
= cast(ulong)data
[4]<<32; goto case 4;
379 case 4: t ^
= cast(ulong)data
[3]<<24; goto case 3;
380 case 3: t ^
= cast(ulong)data
[2]<<16; goto case 2;
381 case 2: t ^
= cast(ulong)data
[1]<<8; goto case 1;
382 case 1: t ^
= cast(ulong)data
[0]; goto default;
385 t
*= 0x2127599bf4325c37UL
;
393 h
*= 0x2127599bf4325c37UL
;
395 static if (usize
.sizeof
== 4) {
397 // the following trick converts the 64-bit hashcode to Fermat
398 // residue, which shall retain information from both the higher
399 // and lower parts of hashcode.
400 return cast(usize
)(h
-(h
>>32));
407 // ////////////////////////////////////////////////////////////////////////// //
408 void doHash(T
) () if (is(T
== struct)) {
409 foreach (string s
; words
) {
417 // ////////////////////////////////////////////////////////////////////////// //
418 void doTest(T
) () if (is(T
== struct)) {
420 write(T
.stringof
, ": ");
421 auto stt
= clockMilli();
422 foreach (immutable _
; 0..Tries
) doHash
!T();
423 auto ett
= clockMilli()-stt
;
424 writeln(words
.length
*Tries
, " (", words
.length
, ", ", Tries
, " times) took ", ett
, " milliseconds");
428 // ////////////////////////////////////////////////////////////////////////// //
429 void doTestX(alias fn
) () {
431 write((&fn
).stringof
[2..$], ": ");
432 auto stt
= clockMilli();
433 foreach (immutable _
; 0..Tries
) {
434 foreach (string s
; words
) cast(void)fn(s
);
436 auto ett
= clockMilli()-stt
;
437 writeln(words
.length
*Tries
, " (", words
.length
, ", ", Tries
, " times) took ", ett
, " milliseconds");
441 // ////////////////////////////////////////////////////////////////////////// //
442 static assert(murHashX("Hello, world!", 1234) == 0xfaf6cdb3U
);
443 static assert(murHashX("Hello, world!", 4321) == 0xbf505788U
);
444 static assert(murHashX("xxxxxxxxxxxxxxxxxxxxxxxxxxxx", 1234) == 0x8905ac28U
);
445 static assert(murHashX("", 1234) == 0x0f2cc00bU
);
447 static assert(bytesHash3("Hello, world!", 1234) == 0xfaf6cdb3U
);
448 static assert(bytesHash3("Hello, world!", 4321) == 0xbf505788U
);
449 static assert(bytesHash3("xxxxxxxxxxxxxxxxxxxxxxxxxxxx", 1234) == 0x8905ac28U
);
450 static assert(bytesHash3("", 1234) == 0x0f2cc00bU
);
453 pragma(msg
, murHashX("Sample string"));
454 pragma(msg
, murHashX("Alice & Miriel"));
455 pragma(msg
, murHash32("Sample string"));
456 pragma(msg
, murHash32("Alice & Miriel"));
459 // ////////////////////////////////////////////////////////////////////////// //
461 assert(object
.hashOf("Hello, world!", 1234) == 0xfaf6cdb3U
);
462 //writeln(object.hashOf("Hello, world!", 1234) == 0xfaf6cdb3U);
470 doTestX
!bytesHash3();
473 foreach (string w
; words
) {
474 if (murHashX(w
) != murHash32(w
)) assert(0, "shit!");
477 writeln("checking murhashes...");
478 char[1024*8] buf
= void;
479 foreach (immutable _
; 0..100000) {
481 auto len
= uniform
!"[]"(0, buf
.length
);
482 foreach (ref char c
; buf
[0..len
]) c
= cast(char)(uniform
!"[]"(0, 255));
483 if (murHashX(buf
[0..len
]) != murHash32(buf
[0..len
])) assert(0, "shit!");