3 # Perfect Minimal Hash Generator written in Perl, which produces
6 # Requires the CPAN Graph module (tested against 0.81, 0.83, 0.84)
10 require 'random_sv_vectors.ph';
14 # Compute the prehash for a key
19 my($key, $n, $sv) = @_;
20 my @c = crc64($sv, $key);
22 # Create a bipartite graph...
23 $k1 = (($c[1] & ($n-1)) << 1) + 0; # low word
24 $k2 = (($c[0] & ($n-1)) << 1) + 1; # high word
30 # Walk the assignment graph
36 # print STDERR "Vertex $n value $v\n";
37 $gr->set_vertex_attribute($n,"val",$v);
39 foreach $nx ($gr->neighbors($n)) {
40 die unless ($gr->has_edge_attribute($n, $nx, "hash"));
41 my $e = $gr->get_edge_attribute($n, $nx, "hash");
43 # print STDERR "Edge $n=$nx value $e: ";
45 if ($gr->has_vertex_attribute($nx, "val")) {
46 die if ($v+$gr->get_vertex_attribute($nx, "val") != $e);
47 # print STDERR "ok\n";
49 walk_graph($gr, $nx, $e-$v);
55 # Generate the function assuming a given N.
57 # gen_hash_n(N, sv, \%data, run)
59 sub gen_hash_n($$$$) {
60 my($n, $sv, $href, $run) = @_;
61 my @keys = keys(%{$href});
67 $gr = Graph::Undirected->new;
68 for ($i = 0; $i < $gsize; $i++) {
73 my ($pf1, $pf2) = prehash($k, $n, $sv);
76 if ($gr->has_edge($pf1, $pf2)) {
77 my $xkey = $gr->get_edge_attribute($pf1, $pf2, "key");
78 my ($xp1, $xp2) = prehash($xkey, $n, $sv);
80 print STDERR "$run: Collision: $pf1=$pf2 $k with ";
81 print STDERR "$xkey ($xp1,$xp2)\n";
86 # print STDERR "Edge $pf1=$pf2 value $e from $k\n";
88 $gr->add_edge($pf1, $pf2);
89 $gr->set_edge_attribute($pf1, $pf2, "hash", $e);
90 $gr->set_edge_attribute($pf1, $pf2, "key", $k);
93 # At this point, we're good if the graph is acyclic.
96 print STDERR "$run: Graph is cyclic\n";
102 print STDERR "$run: Graph OK, computing vertices...\n";
105 # Now we need to assign values to each vertex, so that for each
106 # edge, the sum of the values for the two vertices give the value
107 # for the edge (which is our hash index.) Since the graph is
108 # acyclic, this is always doable.
109 for ($i = 0; $i < $gsize; $i++) {
110 if ($gr->degree($i)) {
111 # This vertex has neighbors (is used)
112 if (!$gr->has_vertex_attribute($i, "val")) {
113 walk_graph($gr,$i,0); # First vertex in a cluster
115 push(@g, $gr->get_vertex_attribute($i, "val"));
122 # for ($i = 0; $i < $n; $i++) {
123 # print STDERR "Vertex ", $i, ": ", $g[$i], "\n";
127 printf STDERR "$run: Done: n = $n, sv = [0x%08x, 0x%08x]\n",
131 return ($n, $sv, \@g);
135 # Driver for generating the function
137 # gen_perfect_hash(\%data)
139 sub gen_perfect_hash($) {
141 my @keys = keys(%{$href});
143 my $n, $i, $j, $sv, $maxj;
146 # Minimal power of 2 value for N with enough wiggle room.
147 # The scaling constant must be larger than 0.5 in order for the
148 # algorithm to ever terminate.
149 my $room = scalar(@keys)*0.8;
155 # Number of times to try...
156 $maxj = scalar @random_sv_vectors;
158 for ($i = 0; $i < 4; $i++) {
159 printf STDERR "%d vectors, trying n = %d...\n",
161 for ($j = 0; $j < $maxj; $j++) {
162 $sv = $random_sv_vectors[$j];
163 @hashinfo = gen_hash_n($n, $sv, $href, $run++);
164 return @hashinfo if (defined(@hashinfo));
180 while (defined($l = <STDIN>)) {
182 $l =~ s/\s*(\#.*|)$//;
186 if ($l =~ /^([^=]+)\=([^=]+)$/) {
199 # Verify that the hash table is actually correct...
201 sub verify_hash_table($$)
203 my ($href, $hashinfo) = @_;
204 my ($n, $sv, $g) = @{$hashinfo};
208 foreach $k (keys(%$href)) {
209 my ($pf1, $pf2) = prehash($k, $n, $sv);
210 my $g1 = ${$g}[$pf1];
211 my $g2 = ${$g}[$pf2];
213 if ($g1+$g2 != ${$href}{$k}) {
214 printf STDERR "%s(%d,%d): %d+%d = %d != %d\n",
215 $k, $pf1, $pf2, $g1, $g2, $g1+$g2, ${$href}{$k};
218 # printf STDERR "%s: %d+%d = %d ok\n",
219 # $k, $g1, $g2, $g1+$g2;
223 die "$0: hash validation error\n" if ($err);