1 /* Invisible Vector Library
2 * coded by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
3 * Understanding is not required. Only obedience.
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, version 3 of the License ONLY.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 module testhash
is aliced
;
21 import iv
.hash
.siphash
;
22 import iv
.hash
.murhash
;
23 import iv
.hash
.fasthash
;
27 version(use_custom_dict
) {
28 enum WordsFile
= "_00/zwords.txt";
30 enum WordsFile
= "/usr/share/dict/words";
38 import std
.stdio
: File
;
40 void addWord (const(char)[] w
) {
41 if (w
.length
== 0 || w
in words
) return;
45 foreach (auto line
; File(WordsFile
).byLine
) {
46 import std
.string
: strip
;
50 foreach (ref ch
; line
) if (ch
>= 'A' && ch
<= 'Z') ch
+= 32;
53 foreach (ref ch
; line
) if (ch
>= 'a' && ch
<= 'z') ch
-= 32;
56 if (line
[0] >= 'A' && line
[0] <= 'Z') {
64 void testHash(T
) (string name
, T hfn
)
65 if (isCallable
!T
&& (is(ReturnType
!T
== uint) ||
is(ReturnType
!T
== ulong)))
67 { import std
.stdio
: stdout
; stdout
.write(name
, " ... "); stdout
.flush(); }
68 alias HType
= ReturnType
!T
;
69 usize
[HType
] collisions
;
71 foreach (immutable w
; words
.byKey
) {
72 static if (__traits(compiles
, { auto h
= hfn(w
); })) {
77 if (auto cc
= h
in collisions
) {
84 usize maxCollision
= 0, elements
= 0, colls
= 0;
85 foreach (immutable c
; collisions
.byValue
) {
86 if (c
> maxCollision
) maxCollision
= c
;
88 if (c
> 1) colls
+= c
-1;
90 assert(maxCollision
>= 1);
92 if (maxCollision
== 1) {
94 writeln("perfect for ", words
.length
, " words!");
96 import std
.algorithm
: sort
;
99 writeln(maxCollision
-1, " max collisions for ", words
.length
, " words, ", colls
, " collisions total");
100 auto cols
= sort
!"a>b"(collisions
.values
).array
;
103 //assert(cols[0] > 1);
104 while (cols
[idx
] > 1) {
107 while (cols
[idx
] == c
) { ++count
; ++idx
; }
108 writeln(" ", count
, " collisions for ", c
, " times");
114 uint innerhash (const(void)[] kbuf
) @trusted nothrow @nogc {
116 if (kbuf
.length
== int.sizeof
) {
117 import core
.stdc
.string
: memcpy
;
118 memcpy(&res
, kbuf
.ptr
, res
.sizeof
);
122 foreach (immutable bt; cast(const(ubyte)[])kbuf
) res
= res
*31+bt;
123 return (res
*87767623)&int.max
;
127 uint outerhash (const(void)[] kbuf
) @trusted nothrow @nogc {
129 foreach_reverse (immutable bt; cast(const(ubyte)[])kbuf
) res
= res
*29+bt;
130 return (res
*5157883)&int.max
;
134 uint djb2 (const(char)[] str) {
136 auto data
= cast(const(ubyte)[])str;
137 foreach (ubyte c
; data
) hash
= ((hash
<< 5)+hash
)+c
; /* hash * 33 + c */
138 auto len
= str.length
;
140 hash
= ((hash
<< 5)+hash
)+(len
&0xff); /* hash * 33 + c */
147 uint djb2x (const(char)[] str) {
149 auto data
= cast(const(ubyte)[])str;
150 foreach (ubyte c
; data
) hash
= ((hash
<< 5)+hash
)^c
; /* hash * 33 ^ c */
151 auto len
= str.length
;
153 hash
= ((hash
<< 5)+hash
)^
(len
&0xff); /* hash * 33 ^ c */
160 uint sdbm (const(char)[] str) {
161 uint hash
= cast(uint)str.length
;
162 auto data
= cast(const(ubyte)[])str;
163 foreach (ubyte c
; data
) hash
= c
+ (hash
<< 6) + (hash
<< 16) - hash
;
168 uint loselose (const(char)[] str) {
170 auto data
= cast(const(ubyte)[])str;
171 foreach (ubyte c
; data
) hash
+= c
;
176 uint sfHash (const(char)[] str) {
177 return SFhashOf(str.ptr
, str.length
);
181 usize
SFhashOf( const (void)* buf
, usize len
, usize seed
= 0 ) @trusted pure nothrow
184 * This is Paul Hsieh's SuperFastHash algorithm, described here:
185 * http://www.azillionmonkeys.com/qed/hash.html
186 * It is protected by the following open source license:
187 * http://www.azillionmonkeys.com/qed/weblicense.html
189 static uint get16bits( const (ubyte)* x
) pure nothrow
191 // CTFE doesn't support casting ubyte* -> ushort*, so revert to
192 // per-byte access when in CTFE.
193 version( HasUnalignedOps
)
196 return *cast(ushort*) x
;
199 return ((cast(uint) x
[1]) << 8) + (cast(uint) x
[0]);
202 // NOTE: SuperFastHash normally starts with a zero hash value. The seed
203 // value was incorporated to allow chaining.
204 auto data
= cast(const (ubyte)*) buf
;
208 if( len
<= 0 || data
is null )
214 for( ; len
> 0; len
-- )
216 hash
+= get16bits( data
);
217 auto tmp
= (get16bits( data
+ 2 ) << 11) ^ hash
;
218 hash
= (hash
<< 16) ^ tmp
;
219 data
+= 2 * ushort.sizeof
;
225 case 3: hash
+= get16bits( data
);
227 hash ^
= data
[ushort.sizeof
] << 18;
230 case 2: hash
+= get16bits( data
);
234 case 1: hash
+= *data
;
242 /* Force "avalanching" of final 127 bits */
254 uint ffHash (const(char)[] str) {
255 return FFhashOf(str.ptr
, str.length
);
260 * 64-bit implementation of fasthash
267 * 32-bit or 64-bit hash
269 usize
FFhashOf (const(void)* buf
, usize len
, usize seed
=0) @trusted pure nothrow @nogc {
272 (cast(ulong)data
[1]<<8)|
273 (cast(ulong)data
[2]<<16)|
274 (cast(ulong)data
[3]<<24)|
275 (cast(ulong)data
[4]<<32)|
276 (cast(ulong)data
[5]<<40)|
277 (cast(ulong)data
[6]<<48)|
278 (cast(ulong)data
[7]<<56)
280 enum m
= 0x880355f21e6d1965UL
;
281 auto data
= cast(const(ubyte)*)buf
;
284 foreach (immutable _
; 0..len
/8) {
285 version(HasUnalignedOps
) {
287 t
= mixin(Get8Bytes
);
289 t
= *cast(ulong*)data
;
292 t
= mixin(Get8Bytes
);
296 t
*= 0x2127599bf4325c37UL
;
305 case 7: t ^
= cast(ulong)data
[6]<<48; goto case 6;
306 case 6: t ^
= cast(ulong)data
[5]<<40; goto case 5;
307 case 5: t ^
= cast(ulong)data
[4]<<32; goto case 4;
308 case 4: t ^
= cast(ulong)data
[3]<<24; goto case 3;
309 case 3: t ^
= cast(ulong)data
[2]<<16; goto case 2;
310 case 2: t ^
= cast(ulong)data
[1]<<8; goto case 1;
311 case 1: t ^
= cast(ulong)data
[0]; goto default;
314 t
*= 0x2127599bf4325c37UL
;
322 h
*= 0x2127599bf4325c37UL
;
324 static if (usize
.sizeof
== 4) {
326 // the following trick converts the 64-bit hashcode to Fermat
327 // residue, which shall retain information from both the higher
328 // and lower parts of hashcode.
329 return cast(usize
)(h
-(h
>>32));
336 uint jenkins_one_at_a_time_hash (const(void)* key
, usize len
) {
338 auto data
= cast(const(ubyte)*)key
;
339 foreach (immutable _
; 0..len
) {
358 uint joaatHash (const(char)[] str) {
359 return jenkins_one_at_a_time_hash(str.ptr
, str.length
);
364 testHash("fastHash64", &fastHash64
!char);
365 testHash("fastHash32", &fastHash32
!char);
366 testHash("siphash24", (const(char)[] buf
) => sipHash24Of("0123456789abcdef", buf
));
367 testHash("murhash", &murHash32
!char);
368 testHash("innerhash", &innerhash
);
369 testHash("outerhash", &outerhash
);
370 testHash("djb2", &djb2
);
371 testHash("djb2x", &djb2x
);
372 testHash("sdbm", &sdbm
);
373 testHash("sfHash", &sfHash
);
374 testHash("ffHash", &ffHash
);
375 testHash("joaatHash0", &joaatHash
);
376 testHash("joaatHash1", &joaatHash32
!char);
377 //testHash("loselose", &loselose);