3 # Perfect Minimal Hash Generator written in Perl, which produces
6 # Requires the CPAN Graph module (tested against 0.81, 0.83, 0.84)
10 require 'random_sv_vectors.ph';
13 # Truncate to 32-bit integer
18 return int($x) % 4294967296;
28 return int32(($v << $s)|($v >> (32-$s)));
32 # Compute the prehash for a key
37 my($key, $n, $sv) = @_;
41 my($s0, $s1, $s2, $s3) = @{$sv};
43 foreach $c (unpack("C*", $key)) {
44 $ko1 = $k1; $ko2 = $k2;
45 $k1 = int32(rot($ko1,$s0)^int32(rot($ko2, $s1)+$c));
46 $k2 = int32(rot($ko2,$s2)^int32(rot($ko1, $s3)+$c));
49 # Create a bipartite graph...
50 $k1 = (($k1 & ($n-1)) << 1) + 0;
51 $k2 = (($k2 & ($n-1)) << 1) + 1;
57 # Walk the assignment graph
63 # print STDERR "Vertex $n value $v\n";
64 $gr->set_vertex_attribute($n,"val",$v);
66 foreach $nx ($gr->neighbors($n)) {
67 die unless ($gr->has_edge_attribute($n, $nx, "hash"));
68 my $e = $gr->get_edge_attribute($n, $nx, "hash");
70 # print STDERR "Edge $n=$nx value $e: ";
72 if ($gr->has_vertex_attribute($nx, "val")) {
73 die if ($v+$gr->get_vertex_attribute($nx, "val") != $e);
74 # print STDERR "ok\n";
76 walk_graph($gr, $nx, $e-$v);
82 # Generate the function assuming a given N.
84 # gen_hash_n(N, sv, \%data)
87 my($n, $sv, $href) = @_;
88 my @keys = keys(%{$href});
94 $gr = Graph::Undirected->new;
95 for ($i = 0; $i < $gsize; $i++) {
100 my ($pf1, $pf2) = prehash($k, $n, $sv);
101 my $e = ${$href}{$k};
103 if ($gr->has_edge($pf1, $pf2)) {
104 my $xkey = $gr->get_edge_attribute($pf1, $pf2, "key");
105 my ($xp1, $xp2) = prehash($xkey, $n, $sv);
106 print STDERR "Collision: $pf1=$pf2 $k with ";
107 print STDERR "$xkey ($xp1,$xp2)\n";
111 # print STDERR "Edge $pf1=$pf2 value $e from $k\n";
113 $gr->add_edge($pf1, $pf2);
114 $gr->set_edge_attribute($pf1, $pf2, "hash", $e);
115 $gr->set_edge_attribute($pf1, $pf2, "key", $k);
118 # At this point, we're good if the graph is acyclic.
119 if ($gr->is_cyclic) {
120 print STDERR "Graph is cyclic\n";
124 print STDERR "Graph OK, computing vertices...\n";
126 # Now we need to assign values to each vertex, so that for each
127 # edge, the sum of the values for the two vertices give the value
128 # for the edge (which is our hash index.) Since the graph is
129 # acyclic, this is always doable.
130 for ($i = 0; $i < $gsize; $i++) {
131 if ($gr->degree($i)) {
132 # This vertex has neighbors (is used)
133 if (!$gr->has_vertex_attribute($i, "val")) {
134 walk_graph($gr,$i,0); # First vertex in a cluster
136 push(@g, $gr->get_vertex_attribute($i, "val"));
143 # for ($i = 0; $i < $n; $i++) {
144 # print STDERR "Vertex ", $i, ": ", $g[$i], "\n";
147 print STDERR "Done: n = $n, sv = [", join(',', @$sv), "]\n";
149 return ($n, $sv, \@g);
153 # Driver for generating the function
155 # gen_perfect_hash(\%data)
157 sub gen_perfect_hash($) {
159 my @keys = keys(%{$href});
161 my $n, $i, $j, $sv, $maxj;
163 # Minimal power of 2 value for N with enough wiggle room.
164 # The scaling constant must be larger than 0.5 in order for the
165 # algorithm to ever terminate.
166 my $room = scalar(@keys)*0.7;
172 # Number of times to try...
173 $maxj = scalar @random_sv_vectors;
175 for ($i = 0; $i < 4; $i++) {
176 print STDERR "Trying n = $n...\n";
177 for ($j = 0; $j < $maxj; $j++) {
178 $sv = $random_sv_vectors[$j];
179 @hashinfo = gen_hash_n($n, $sv, $href);
180 return @hashinfo if (defined(@hashinfo));
197 while (defined($l = <STDIN>)) {
199 $l =~ s/\s*(\#.*|)$//;
203 if ($l =~ /^([^=]+)\=([^=]+)$/) {
216 # Verify that the hash table is actually correct...
218 sub verify_hash_table($$)
220 my ($href, $hashinfo) = @_;
221 my ($n, $sv, $g) = @{$hashinfo};
225 foreach $k (keys(%$href)) {
226 my ($pf1, $pf2) = prehash($k, $n, $sv);
227 my $g1 = ${$g}[$pf1];
228 my $g2 = ${$g}[$pf2];
230 if ($g1+$g2 != ${$href}{$k}) {
231 printf STDERR "%s(%d,%d): %d+%d = %d != %d\n",
232 $k, $pf1, $pf2, $g1, $g2, $g1+$g2, ${$href}{$k};
235 # printf STDERR "%s: %d+%d = %d ok\n",
236 # $k, $g1, $g2, $g1+$g2;
240 die "$0: hash validation error\n" if ($err);