Merge remote-tracking branch 'origin/nasm-2.13.xx'
[nasm.git] / nasmlib / perfhash.pl
blob639b347fd9403a793fc3167918b8090609903a1e
1 #!/usr/bin/perl
2 ## --------------------------------------------------------------------------
3 ##
4 ## Copyright 1996-2017 The NASM Authors - All Rights Reserved
5 ## See the file AUTHORS included with the NASM distribution for
6 ## the specific copyright holders.
7 ##
8 ## Redistribution and use in source and binary forms, with or without
9 ## modification, are permitted provided that the following
10 ## conditions are met:
12 ## * Redistributions of source code must retain the above copyright
13 ## notice, this list of conditions and the following disclaimer.
14 ## * Redistributions in binary form must reproduce the above
15 ## copyright notice, this list of conditions and the following
16 ## disclaimer in the documentation and/or other materials provided
17 ## with the distribution.
19 ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
20 ## CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
21 ## INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
22 ## MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 ## DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
24 ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 ## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 ## NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 ## LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 ## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 ## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
30 ## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
31 ## EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 ## --------------------------------------------------------------------------
36 # Generate a perfect hash for general case-insensitive string-to-enum
37 # lookup. This generates an enum and the corresponding hash, but
38 # relies on a common function to parse the hash.
40 # Usage:
41 # perfhash.pl h foohash.dat foohash.h (to generate C header)
42 # perfhash.pl c foohash.dat foohash.c (to generate C source)
45 use strict;
47 require 'phash.ph';
49 sub basename($) {
50 my($s) = @_;
51 $s =~ s/^.*[^-[:alnum:]_\.]//; # Remove path component as best we can
52 return $s;
55 sub intval($) {
56 my($s) = @_;
58 if ($s =~ /^0/) {
59 return oct($s); # Handles octal or hexadecimal
60 } elsif ($s =~ /^\-(0.*)$/) {
61 return -oct($1);
62 } else {
63 return $s + 0; # Forcibly convert to number
67 my($output, $infile, $outfile) = @ARGV;
68 my $me = basename($0);
70 # The following special things are allowed in the input file:
71 # #<space> or ; begins a comment
72 # #include filename
73 # #name str
74 # The name of the hash
75 # #prefix str
76 # Defines the prefix before enum
77 # #guard str
78 # Defines the header guard string
79 # #special str [= value]
80 # Generate an enum value without a corresponding string; not capitalized.
81 # #header str
82 # Indicates the name of the .h file to include from the .c file
83 # #errval str
84 # Define the value to be returned if a string is not found
85 # (defaults to -1). This can be any constant C expression,
86 # including one of the enum values.
88 # Regular lines are just str [= value]
90 # Enumeration is generated in the order listed in the file, just as in C
91 # specifying a value causes the values to increase by 1 from that point on
92 # unless specified.
94 my $name;
95 my $prefix;
96 my $guard;
97 my $hfile;
99 my %strings = ();
100 my %specials = ();
101 my $next_value = 0;
102 my $errval = '-1';
104 my @incstack = ();
105 my @filenames = ($infile);
106 my @linenums = (0);
107 my $dd = undef;
108 my $err = 0;
110 while (scalar(@filenames)) {
111 if (!defined($dd)) {
112 open($dd, '<', $filenames[-1])
113 or die "$0: cannot open: $filenames[-1]: $!\n";
116 my $line = <$dd>;
117 if (!defined($line)) {
118 close($dd);
119 $dd = pop @incstack;
120 pop @filenames;
121 pop @linenums;
122 next;
125 $linenums[-1]++;
127 chomp $line;
128 $line =~ s/\s*(|\;.*|\#\s.*|\#)$//; # Remove comments and trailing space
129 $line =~ s/^\s+//; # Remove leading space
130 if ($line eq '') {
131 # Do nothing
132 } elsif ($line =~ /^\#name\s+([[:alnum:]_]+)$/) {
133 $name = $1;
134 } elsif ($line =~ /^\#prefix\s+([[:alnum:]_]+)$/) {
135 $prefix = $1;
136 } elsif ($line =~ /^\#guard\s+([[:alnum:]_]+)$/) {
137 $guard = $1;
138 } elsif ($line =~ /^\#errval\s+(\S.*)$/) {
139 $errval = $1;
140 } elsif ($line =~ /^\#header\s+(\"(.+)\"|\S+)$/) {
141 $hfile = ($2 ne '') ? $2 : $1;
142 } elsif ($line =~ /^\#include\s+(\"(.+)\"|\S+)$/) {
143 push @incstack, $dd;
144 push @filenames, (($2 ne '') ? $2 : $1);
145 push @linenums, 0;
146 undef $dd; # Open a new file
147 } elsif ($line =~ /^(|\#special\s+)(\S+)\s*(|=\s*(\-?(0[Xx][[:xdigit:]]+|0[0-7]*|[0-9]+)))$/) {
148 $next_value = intval($4) if ($4 ne '');
149 if ($1 eq '') {
150 $strings{$2} = $next_value++;
151 } else {
152 $specials{$2} = $next_value++;
154 } else {
155 printf STDERR "%s:%d:%s syntax error: \"%s\"\n",
156 $filenames[-1], $linenums[-1],
157 (scalar(@incstack) == 1) ? '' : "(from $infile)", $line;
158 $err++;
162 exit 1 if ($err);
164 # Default name, prefix, and header guard name
165 if (!defined($name)) {
166 $name = basename($infile);
167 $name =~ s/(\..*)$//; # Strip extension, if any
169 if (!defined($prefix)) {
170 $prefix = "\U${name}\E_";
172 if (!defined($hfile)) {
173 $hfile = $outfile;
174 $hfile =~ s/\.c$/\.h/;
176 if (!defined($guard)) {
177 $guard = basename($hfile);
178 $guard =~ s/[^[:alnum:]_]/_/g;
179 $guard =~ s/__+/_/g;
180 $guard = "\U$guard";
183 # Verify input. We can't have more than one constant with the same
184 # enumeration value, nor the same enumeration string.
185 if (scalar(keys(%strings)) == 0) {
186 die "$0: $infile: no strings to hash!\n";
189 my %enums;
190 my %enumvals;
191 my %stringbyval;
192 my $max_enum;
193 my $tbllen = 0;
194 my $tbloffs;
195 foreach my $s (keys(%strings)) {
196 my $es = "${prefix}\U${s}";
197 $es =~ s/[^[:alnum:]_]/_/g;
198 $es =~ s/__+/_/g;
199 my $v = $strings{$s};
200 $stringbyval{$v} = $s;
201 if (defined($enums{$es})) {
202 printf STDERR "%s: string \"%s\" duplicates existing enum %s\n",
203 $infile, $s, $es;
204 $err++;
205 } else {
206 $enums{$es} = $v;
208 if (defined($enumvals{$v})) {
209 printf STDERR "%s: string \"%s\" duplicates existing enum constant %d\n", $v;
210 $err++;
211 } else {
212 $enumvals{$v} = $es;
214 $max_enum = $v if ($v > $max_enum || !defined($max_enum));
215 $tbloffs = $v if ($v < $tbloffs || !defined($tbloffs));
216 $tbllen = $v+1 if ($v >= $tbllen || !defined($tbllen));
218 foreach my $s (keys(%specials)) {
219 my $es = $prefix . $s; # No string mangling here
220 my $v = $specials{$s};
221 if (defined($enums{$es})) {
222 printf STDERR "%s: special \"%s\" duplicates existing enum %s\n",
223 $infile, $s, $es;
224 $err++;
225 } else {
226 $enums{$es} = $v;
228 if (defined ($enumvals{$v})) {
229 printf STDERR "%s: special \"%s\" duplicates existing enum constant %d\n", $v;
230 $err++;
231 } else {
232 $enumvals{$v} = $es;
234 $max_enum = $v if ($v > $max_enum || !defined($max_enum));
237 $tbllen -= $tbloffs;
238 if ($tbllen > 65536) {
239 printf STDERR "%s: span of enumeration values too large\n";
240 $err++;
243 exit 1 if ($err);
245 open(F, '>', $outfile)
246 or die "$0: cannot create: ${outfile}: $!\n";
248 if ($output eq 'h') {
249 print F "/*\n";
250 print F " * This file is generated from $infile\n";
251 print F " * by $me; do not edit.\n";
252 print F " */\n";
253 print F "\n";
255 print F "#ifndef $guard\n";
256 print F "#define $guard 1\n\n";
257 print F "#include \"perfhash.h\"\n\n";
259 my $c = '{';
260 $next_value = 0;
261 print F "enum ${name} ";
262 foreach my $v (sort { $a <=> $b } keys(%enumvals)) {
263 my $s = $enumvals{$v};
264 print F "$c\n $s";
265 print F " = $v" if ($v != $next_value);
266 $next_value = $v + 1;
267 $c = ',';
269 print F "\n};\n\n";
270 print F "extern const struct perfect_hash ${name}_hash;\n";
271 printf F "extern const char * const %s_tbl[%d];\n", $name, $tbllen;
273 print F "\nstatic inline enum ${name} ${name}_find(const char *str)\n";
274 print F "{\n";
275 print F " return perfhash_find(&${name}_hash, str);\n";
276 print F "}\n";
278 print F "\nstatic inline const char * ${name}_name(enum ${name} x)\n";
279 print F "{\n";
280 printf F " size_t ix = (size_t)x - (%d);\n", $tbloffs;
281 printf F " if (ix >= %d)\n", $tbllen;
282 print F " return NULL;\n";
283 print F " return ${name}_tbl[ix];\n";
284 print F "}\n";
286 print F "\nstatic inline const char * ${name}_dname(enum ${name} x)\n";
287 print F "{\n";
288 print F " const char *y = ${name}_name(x);\n";
289 print F " return y ? y : invalid_enum_str(x);\n";
290 print F "}\n";
292 print F "\n#endif /* $guard */\n";
293 } elsif ($output eq 'c') {
294 # The strings we hash must all be lower case, even if the string
295 # table doesn't contain them that way.
297 my %lcstrings;
298 foreach my $s (keys(%strings)) {
299 my $ls = "\L$s";
300 if (defined($lcstrings{$ls})) {
301 printf STDERR "%s: strings \"%s\" and \"%s\" differ only in case\n",
302 $infile, $s, $strings{$lcstrings{$s}};
303 } else {
304 $lcstrings{$ls} = $strings{$s} - $tbloffs;
308 my @hashinfo = gen_perfect_hash(\%lcstrings);
309 if (!@hashinfo) {
310 die "$0: no hash found\n";
313 # Paranoia...
314 verify_hash_table(\%lcstrings, \@hashinfo);
316 my ($n, $sv, $g) = @hashinfo;
318 die if ($n & ($n-1));
320 print F "/*\n";
321 print F " * This file is generated from $infile\n";
322 print F " * by $me; do not edit.\n";
323 print F " */\n";
324 print F "\n";
326 print F "#include \"$hfile\"\n\n";
328 printf F "const char * const %s_tbl[%d] = ", $name, $tbllen;
329 my $c = '{';
330 for (my $i = $tbloffs; $i < $tbloffs+$tbllen; $i++) {
331 printf F "%s\n %s", $c,
332 defined($stringbyval{$i}) ? '"'.$stringbyval{$i}.'"' : 'NULL';
333 $c = ',';
335 print F "\n};\n\n";
337 print F "#define UNUSED (65536/3)\n\n";
339 printf F "static const int16_t %s_hashvals[%d] = ", $name, $n*2;
340 $c = '{';
341 for (my $i = 0; $i < $n; $i++) {
342 my $h = ${$g}[$i*2+0];
343 print F "$c\n ", defined($h) ? $h : 'UNUSED';
344 $c = ',';
346 for (my $i = 0; $i < $n; $i++) {
347 my $h = ${$g}[$i*2+1];
348 print F "$c\n ", defined($h) ? $h : 'UNUSED';
349 $c = ',';
351 print F "\n};\n\n";
353 print F "const struct perfect_hash ${name}_hash = {\n";
354 printf F " UINT64_C(0x%08x%08x),\n", $$sv[0], $$sv[1]; # crcinit
355 printf F " UINT32_C(0x%x),\n", $n-1; # hashmask
356 printf F " UINT32_C(%u),\n", $tbllen; # tbllen
357 printf F " %d,\n", $tbloffs; # tbloffs
358 printf F " (%s),\n", $errval; # errval
359 printf F " ${name}_hashvals,\n"; # hashvals
360 printf F " ${name}_tbl\n"; # strings
361 print F "};\n";