Initial commit
[cgperf.git] / output.c
blobce761176a0e5830285d4ef3a585a01816daa579b
1 #ifndef CGPERF_OUTPUT_C
2 #define CGPERF_OUTPUT_C
3 #include <stdbool.h>
4 #include "c_fixing.h"
5 #include "globals.h"
6 #include "options.h"
7 #include "output.h"
8 #include "keyword.h"
9 #include "keyword_list.h"
10 #include "positions.h"
11 /*------------------------------------------------------------------------------------------------*/
12 #include "namespace/globals.h"
13 #include "namespace/options.h"
14 #include "namespace/output.h"
15 #include "namespace/output.c"
16 #include "namespace/keyword.h"
17 #include "namespace/keyword_list.h"
18 #include "namespace/positions.h"
19 /*------------------------------------------------------------------------------------------------*/
20 /* We use a downcase table because when called repeatedly, the code gperf_downcase[c]
21 is faster than
22 if (c >= 'A' && c <= 'Z')
23 c += 'a' - 'A';
25 #define USE_DOWNCASE_TABLE 1
26 /*------------------------------------------------------------------------------------------------*/
27 /*{{{ local */
28 /*{{{ types */
30 * because of the way output_keyword_table works, every duplicate set is
31 * stored contiguously in the wordlist array
33 struct Duplicate_Entry
35 s32 hash_value; /* hash value for this particular duplicate set */
36 s32 index; /* index into the main keyword storage array */
37 s32 count; /* number of consecutive duplicates at this index */
39 /*}}}*/
40 /*{{{ variables */
41 /* the "register " storage-class specifier */
42 static u8 *register_scs;
43 /* the "const " qualifier */
44 static u8 *const_always;
45 /* the "const " qualifier, for read-only arrays */
46 static u8 *const_readonly_array;
47 /* the "const " qualifier, for the array type */
48 static u8 *const_for_struct;
49 /*}}} variables -- END */
50 /*{{{ code */
51 /*{{{ output_string */
53 * Outputs a keyword, as a string: enclosed in double quotes, escaping backslashes, double quote and
54 * unprintable characters
56 static void output_string(u8 *key, s32 len)
58 putchar('"');
59 loop {
60 u8 c;
62 if (len <= 0)
63 break;
64 c = (u8)(*key++);
65 if (isprint(c)) {
66 if (c == '"' || c == '\\')
67 putchar('\\');
68 putchar(c);
69 } else {
71 * Use octal escapes, not hexadecimal escapes, because some old C compilers
72 * didn't understand hexadecimal escapes, and because hexadecimal escapes
73 * are not limited to 2 digits, thus needing special care if the following
74 * character happens to be a digit.
76 putchar('\\');
77 putchar('0' + ((c >> 6) & 7));
78 putchar('0' + ((c >> 3) & 7));
79 putchar('0' + (c & 7));
81 len--;
83 putchar('"');
84 }/*}}}*/
85 /*{{{ output_line_directive */
86 /* outputs a #line directive, referring to the given line number */
87 static void output_line_directive(u32 lineno)
89 u8 *file_name;
91 file_name = options->input_file_name;
92 if (file_name != 0) {
93 printf("#line %u ", lineno);
94 output_string(file_name, (s32)strlen(file_name));
95 printf("\n");
97 }/*}}}*/
98 /*{{{ output_constant_define */
99 static void output_constant_define(u8 *name, s32 value)
101 u8 *prefix;
102 u8 *combined_name;
104 prefix = options->constants_prefix;
105 combined_name = calloc(strlen(prefix) + strlen(name) + 1, sizeof(u8));
106 strcpy(combined_name, prefix);
107 strcpy(combined_name + strlen(prefix), name);
108 printf("#define %s %d\n", combined_name, value);
109 free(combined_name);
110 }/*}}}*/
111 /*{{{ output_constant_enum */
112 static void output_constant_enum(u8 *name, s32 value, u8 *indentation, bool *pending_comma)
114 u8 *prefix;
115 u8 *combined_name;
117 prefix = options->constants_prefix;
118 combined_name = calloc(strlen(prefix) + strlen(name) + 1, sizeof(u8));
119 strcpy(combined_name, prefix);
120 strcpy(combined_name + strlen(prefix), name);
121 if (*pending_comma)
122 printf (",\n");
123 printf("%s %s = %d", indentation, combined_name, value);
124 *pending_comma = true;
125 free(combined_name);
126 }/*}}}*/
127 /*{{{ ouput_upperlower_table */
128 #if USE_DOWNCASE_TABLE
129 static void output_upperlower_table(void)
131 u32 c;
133 printf(
134 "#ifndef GPERF_DOWNCASE\n"
135 "#define GPERF_DOWNCASE 1\n"
136 "static %sunsigned char gperf_downcase[256] =\n"
137 " {",
138 const_readonly_array);
139 c = 0;
140 loop {
141 if (c >= 256)
142 break;
143 if ((c % 15) == 0)
144 printf("\n ");
145 printf(" %3d", c >= 'A' && c <= 'Z' ? c + 'a' - 'A' : c);
146 if (c < 255)
147 printf (",");
148 ++c;
150 printf("\n"
151 " };\n"
152 "#endif\n\n");
154 #endif
155 /*}}}*/
156 /*{{{ output_upperlower_memcmp */
157 /* output gperf's ASCII-case insensitive memcmp replacement */
158 static void output_upperlower_memcmp(void)
160 printf(
161 "#ifndef GPERF_CASE_MEMCMP\n"
162 "#define GPERF_CASE_MEMCMP 1\n"
163 "static int\n"
164 "gperf_case_memcmp ");
165 printf(OPTS(KRC) ? "(s1, s2, n)\n"
166 " %schar *s1;\n"
167 " %schar *s2;\n"
168 " %ssize_t n;\n" :
169 OPTS(C) ? "(s1, s2, n)\n"
170 " %sconst char *s1;\n"
171 " %sconst char *s2;\n"
172 " %ssize_t n;\n" :
173 OPTS(ANSIC) || OPTS(CPLUSPLUS) ? "(%sconst char *s1, %sconst char *s2, %ssize_t n)\n" :
174 "", register_scs, register_scs, register_scs);
175 #if USE_DOWNCASE_TABLE
176 printf(
177 "{\n"
178 " for (; n > 0;)\n"
179 " {\n"
180 " unsiGNED char c1 = gperf_downcase[(unsigned char)*s1++];\n"
181 " unsigned char c2 = gperf_downcase[(unsigned char)*s2++];\n"
182 " if (c1 == c2)\n"
183 " {\n"
184 " n--;\n"
185 " continue;\n"
186 " }\n"
187 " return (int)c1 - (int)c2;\n"
188 " }\n"
189 " return 0;\n"
190 "}\n");
191 #else
192 printf(
193 "{\n"
194 " for (; n > 0;)\n"
195 " {\n"
196 " unsigned char c1 = *s1++;\n"
197 " unsigned char c2 = *s2++;\n"
198 " if (c1 >= 'A' && c1 <= 'Z')\n"
199 " c1 += 'a' - 'A';\n"
200 " if (c2 >= 'A' && c2 <= 'Z')\n"
201 " c2 += 'a' - 'A';\n"
202 " if (c1 == c2)\n"
203 " {\n"
204 " n--;\n"
205 " continue;\n"
206 " }\n"
207 " return (int)c1 - (int)c2;\n"
208 " }\n"
209 " return 0;\n"
210 "}\n");
211 #endif
212 printf(
213 "#endif\n\n");
214 }/*}}}*/
215 /*{{{ output_upperlower_strncmp */
216 /* output gperf's ASCII-case insensitive strncmp replacement */
217 static void output_upperlower_strncmp(void)
219 printf(
220 "#ifndef GPERF_CASE_STRNCMP\n"
221 "#define GPERF_CASE_STRNCMP 1\n"
222 "static int\n"
223 "gperf_case_strncmp ");
224 printf(OPTS(KRC) ? "(s1, s2, n)\n"
225 " %schar *s1;\n"
226 " %schar *s2;\n"
227 " %ssize_t n;\n" :
228 OPTS(C) ? "(s1, s2, n)\n"
229 " %sconst char *s1;\n"
230 " %sconst char *s2;\n"
231 " %ssize_t n;\n" :
232 OPTS(ANSIC) || OPTS(CPLUSPLUS) ? "(%sconst char *s1, %sconst char *s2, %ssize_t n)\n" :
233 "", register_scs, register_scs, register_scs);
234 #if USE_DOWNCASE_TABLE
235 printf(
236 "{\n"
237 " for (; n > 0;)\n"
238 " {\n"
239 " unsigned char c1 = gperf_downcase[(unsigned char)*s1++];\n"
240 " unsigned char c2 = gperf_downcase[(unsigned char)*s2++];\n"
241 " if (c1 != 0 && c1 == c2)\n"
242 " {\n"
243 " n--;\n"
244 " continue;\n"
245 " }\n"
246 " return (int)c1 - (int)c2;\n"
247 " }\n"
248 " return 0;\n"
249 "}\n");
250 #else
251 printf(
252 "{\n"
253 " for (; n > 0;)\n"
254 " {\n"
255 " unsigned char c1 = *s1++;\n"
256 " unsigned char c2 = *s2++;\n"
257 " if (c1 >= 'A' && c1 <= 'Z')\n"
258 " c1 += 'a' - 'A';\n"
259 " if (c2 >= 'A' && c2 <= 'Z')\n"
260 " c2 += 'a' - 'A';\n"
261 " if (c1 != 0 && c1 == c2)\n"
262 " {\n"
263 " n--;\n"
264 " continue;\n"
265 " }\n"
266 " return (int)c1 - (int)c2;\n"
267 " }\n"
268 " return 0;\n"
269 "}\n");
270 #endif
271 printf(
272 "#endif\n\n");
273 }/*}}}*/
274 /*{{{ output_upperlower_strcmp */
275 /* output gperf's ASCII-case insensitive strcmp replacement */
276 static void output_upperlower_strcmp(void)
278 printf(
279 "#ifndef GPERF_CASE_STRCMP\n"
280 "#define GPERF_CASE_STRCMP 1\n"
281 "static int\n"
282 "gperf_case_strcmp ");
283 printf(OPTS(KRC) ? "(s1, s2)\n"
284 " %schar *s1;\n"
285 " %schar *s2;\n" :
286 OPTS(C) ? "(s1, s2)\n"
287 " %sconst char *s1;\n"
288 " %sconst char *s2;\n" :
289 OPTS(ANSIC) || OPTS(CPLUSPLUS) ? "(%sconst char *s1, %sconst char *s2)\n" :
290 "", register_scs, register_scs);
291 #if USE_DOWNCASE_TABLE
292 printf(
293 "{\n"
294 " for (;;)\n"
295 " {\n"
296 " unsigned char c1 = gperf_downcase[(unsigned char)*s1++];\n"
297 " unsigned char c2 = gperf_downcase[(unsigned char)*s2++];\n"
298 " if (c1 != 0 && c1 == c2)\n"
299 " continue;\n"
300 " return (int)c1 - (int)c2;\n"
301 " }\n"
302 "}\n");
303 #else
304 printf(
305 "{\n"
306 " for (;;)\n"
307 " {\n"
308 " unsigned char c1 = *s1++;\n"
309 " unsigned char c2 = *s2++;\n"
310 " if (c1 >= 'A' && c1 <= 'Z')\n"
311 " c1 += 'a' - 'A';\n"
312 " if (c2 >= 'A' && c2 <= 'Z')\n"
313 " c2 += 'a' - 'A';\n"
314 " if (c1 != 0 && c1 == c2)\n"
315 " continue;\n"
316 " return (int)c1 - (int)c2;\n"
317 " }\n"
318 "}\n");
319 #endif
320 printf
321 ("#endif\n\n");
322 }/*}}}*/
323 /*{{{ smallest_integral_type */
324 /* returns the smallest unsigned C type capable of holding integers up to N */
325 static u8 *smallest_integral_type(s32 n)
327 if (n <= UCHAR_MAX) return "unsigned char";
328 if (n <= USHRT_MAX) return "unsigned short";
329 return "unsigned int";
330 }/*}}}*/
331 /*{{{ smallest_integral_type_2 */
332 /* returns the smallest signed C type capable of holding integers from MIN to MAX */
333 static u8 *smallest_integral_type_2(s32 min, s32 max)
335 if (OPTS(ANSIC) || OPTS(CPLUSPLUS))
336 if (min >= SCHAR_MIN && max <= SCHAR_MAX) return "signed char";
337 if (min >= SHRT_MIN && max <= SHRT_MAX) return "short";
338 return "int";
339 }/*}}}*/
340 /*{{{ output_const_type */
342 * Outputs a type and a const specifier (i.e. "const " or "").
343 * The output is terminated with a space.
345 static void output_const_type(u8 *const_string, u8 *type_string)
347 if (type_string[strlen(type_string) - 1] == '*')
348 /* for pointer types, put the 'const' after the type */
349 printf(
350 "%s %s", type_string, const_string);
351 else
352 /* for scalar or struct types, put the 'const' before the type */
353 printf(
354 "%s%s ", const_string, type_string);
355 }/*}}}*/
356 /*{{{ output_keyword_blank_entries */
357 static void output_keyword_blank_entries(s32 count, u8 *indent)
359 s32 columns;
360 s32 column;
361 s32 i;
363 if (OPTS(TYPE)) {
364 columns = 58 / (4 + (OPTS(SHAREDLIB) ? 2 : OPTS(NULLSTRINGS) ? 8 : 2)
365 + strlen(options->initializer_suffix));
366 if (columns == 0)
367 columns = 1;
368 } else
369 columns = (OPTS(SHAREDLIB) ? 9 : OPTS(NULLSTRINGS) ? 4 : 9);
370 column = 0;
371 i = 0;
372 loop {
373 if (i >= count)
374 break;
375 if ((column % columns) == 0) {
376 if (i > 0)
377 printf(
378 ",\n");
379 printf(
380 "%s ", indent);
381 } else if (i > 0)
382 printf(", ");
383 if (OPTS(TYPE))
384 printf("{");
385 if (OPTS(SHAREDLIB))
386 printf("-1");
387 else {
388 if (OPTS(NULLSTRINGS))
389 printf("(char*)0");
390 else
391 printf("\"\"");
393 if (OPTS(TYPE))
394 printf(
395 "%s}", options->initializer_suffix);
396 ++column;
397 ++i;
399 }/*}}}*/
400 /*{{{ output_keyword_entry */
401 static void output_keyword_entry(struct Keyword *tmp, s32 stringpool_index, u8 *indent,
402 bool is_duplicate)
404 if (OPTS(TYPE))
405 output_line_directive(tmp->lineno);
406 printf(
407 "%s ", indent);
408 if (OPTS(TYPE))
409 printf("{");
410 if (OPTS(SHAREDLIB))
412 * How to determine a certain offset in stringpool at compile time?
413 * - The standard way would be to use the 'offsetof' macro. But it is only
414 * defined in <stddef.h>, and <stddef.h> is not among the prerequisite
415 * header files that the user must #include.
416 * - The next best way would be to take the address and cast to 'intptr_t'
417 * or 'uintptr_t'. But these types are only defined in <stdint.h>, and
418 * <stdint.h> is not among the prerequisite header files that the user
419 * must #include.
420 * - The next best approximation of 'uintptr_t' is 'size_t'. It is defined
421 * in the prerequisite header <string.h>.
422 * - The types 'long' and 'unsigned long' do work as well, but on 64-bit
423 * native Windows platforms, they don't have the same size as pointers
424 * and therefore generate warnings.
426 printf("(int)(size_t)&((struct %s_t *)0)->%s_str%d",
427 options->stringpool_name, options->stringpool_name, stringpool_index);
428 else
429 output_string(tmp->allchars, tmp->allchars_length);
430 if (OPTS(TYPE)) {
431 if (strlen(tmp->rest) > 0)
432 printf(",%s", tmp->rest);
433 printf("}");
435 if (OPTS(DEBUG)) {
436 printf(" /* ");
437 if (is_duplicate)
438 printf("hash value duplicate, ");
439 else
440 printf("hash value = %d, ", tmp->hash_value);
441 printf("index = %d */", tmp->final_index);
444 }/*}}}*/
445 /*{{{ output_switch_case */
446 /* Output a single switch case (including duplicates). Advance list. */
447 static struct Keyword_List *output_switch_case(struct Keyword_List *list, s32 indent,
448 s32 *jumps_away)
450 if (OPTS(DEBUG))
451 printf(
452 "%*s/* hash value = %4d, keyword = \"%.*s\" */\n", indent, "", list->kw->hash_value,
453 list->kw->allchars_length, list->kw->allchars);
454 if (OPTS(DUP) && list->kw->duplicate_link) {
455 s32 count;
456 struct Keyword *links;
458 if (OPTS(LENTABLE))
459 printf(
460 "%*slengthptr = &%s[%d];\n", indent, "", options->lengthtable_name, list->kw->final_index);
461 printf(
462 "%*swordptr = &%s[%d];\n", indent, "", options->wordlist_name, list->kw->final_index);
463 count = 0;
464 links = list->kw;
465 loop {
466 if (links == 0)
467 break;
468 ++count;
469 links = links->duplicate_link;
471 printf(
472 "%*swordendptr = wordptr + %d;\n"
473 "%*sgoto multicompare;\n", indent, "", count, indent, "");
474 *jumps_away = 1;
475 } else {
476 if (OPTS(LENTABLE)) {
477 printf(
478 "%*sif (len == %d)\n"
479 "%*s {\n", indent, "", list->kw->allchars_length, indent, "");
480 indent += 4;
482 printf("%*sresword = ", indent, "");
483 if (OPTS(TYPE))
484 printf("&%s[%d]", options->wordlist_name, list->kw->final_index);
485 else
486 output_string(list->kw->allchars, list->kw->allchars_length);
487 printf(";\n");
488 printf(
489 "%*sgoto compare;\n", indent, "");
490 if (OPTS(LENTABLE)) {
491 indent -= 4;
492 printf(
493 "%*s }\n", indent, "");
494 } else
495 *jumps_away = 1;
497 return list->next;
498 }/*}}}*/
499 /*{{{ output_switches */
501 * output a total of size cases, grouped into num_switches switch statements, where 0 <
502 * num_switches <= size
504 static void output_switches(struct Keyword_List *list, s32 num_switches, s32 size,
505 s32 min_hash_value, s32 max_hash_value, s32 indent)
507 if (OPTS(DEBUG))
508 printf(
509 "%*s/* know %d <= key <= %d, contains %d cases */\n", indent, "", min_hash_value, max_hash_value,
510 size);
511 if (num_switches > 1) {
512 s32 part1;
513 s32 part2;
514 s32 size1;
515 s32 size2;
516 struct Keyword_List *tmp;
517 s32 count;
519 part1 = num_switches / 2;
520 part2 = num_switches - part1;
521 size1 = (s32)((f64)(size) / (f64)(num_switches) * (f64)(part1) + 0.5);
522 size2 = size - size1;
524 tmp = list;
525 count = size1;
526 loop {
527 if (count <= 0)
528 break;
529 tmp = tmp->next;
530 count--;
532 printf(
533 "%*sif (key < %d)\n"
534 "%*s {\n", indent, "", tmp->kw->hash_value, indent, "");
535 output_switches(list, part1, size1, min_hash_value, tmp->kw->hash_value - 1,
536 indent + 4);
537 printf(
538 "%*s }\n"
539 "%*selse\n"
540 "%*s {\n", indent, "", indent, "", indent, "");
541 output_switches(tmp, part2, size2, tmp->kw->hash_value, max_hash_value, indent + 4);
542 printf(
543 "%*s }\n", indent, "");
544 } else {
545 s32 lowest_case_value;
547 lowest_case_value = list->kw->hash_value;
548 if (size == 1) {
549 s32 jumps_away;
551 jumps_away = 0;
552 if (min_hash_value == max_hash_value)
553 output_switch_case(list, indent, &jumps_away);
554 else {
555 printf(
556 "%*sif (key == %d)\n"
557 "%*s {\n", indent, "", lowest_case_value, indent, "");
558 output_switch_case(list, indent + 4, &jumps_away);
559 printf(
560 "%*s }\n", indent, "");
562 } else {
563 if (lowest_case_value == 0)
564 printf(
565 "%*sswitch (key)\n", indent, "");
566 else
567 printf(
568 "%*sswitch (key - %d)\n", indent, "", lowest_case_value);
569 printf(
570 "%*s {\n", indent, "");
571 loop {
572 s32 jumps_away;
574 if (size <= 0)
575 break;
576 jumps_away = 0;
577 printf(
578 "%*s case %d:\n", indent, "", list->kw->hash_value - lowest_case_value);
579 list = output_switch_case(list, indent + 6, &jumps_away);
580 if (!jumps_away)
581 printf(
582 "%*s break;\n", indent, "");
583 size--;
585 printf(
586 "%*s }\n", indent, "");
589 }/*}}}*/
590 /*{{{ output_firstchar_comparison */
592 * Outputs the comparison expression for the first byte. Returns true if the this comparison is
593 * complete.
595 static bool output_firstchar_comparison(u8 *expr1, u8 *expr2)
598 * First, we emit a comparison of the first byte of the two strings. This catches most
599 * cases where the string being looked up is not in the hash table but happens to have the
600 * same hash code as an element of the hash table.
602 if (OPTS(UPPERLOWER)) {
603 /* incomplete comparison, just for speedup */
604 printf("(((unsigned char)*");
605 printf("%s", expr1);
606 printf(" ^ (unsigned char)*");
607 printf("%s", expr2);
608 printf(") & ~32) == 0");
609 return false;
611 /* Complete comparison. */
612 printf("*");
613 printf("%s", expr1);
614 printf(" == *");
615 printf("%s", expr2);
616 return true;
617 }/*}}}*/
618 /*------------------------------------------------------------------------------------------------*/
619 /*{{{ output_comparison_X */
621 * Outputs the comparison expression.
622 * expr1 outputs a simple expression of type 'const char *' referring to the string being looked up.
623 * expr2 outputs a simple expression of type 'const char *' referring to the constant string stored
624 * in the gperf generated hash table.
626 /*{{{ output_comparison_memcmp */
627 static void output_comparison_memcmp(u8 *expr1, u8 *expr2)
629 bool firstchar_done;
631 firstchar_done = output_firstchar_comparison(expr1, expr2);
632 printf(" && !");
633 if (OPTS(UPPERLOWER))
634 printf("gperf_case_");
635 printf("memcmp (");
636 if (firstchar_done) {
637 printf("%s", expr1);
638 printf(" + 1, ");
639 printf("%s", expr2);
640 printf(" + 1, len - 1");
641 } else {
642 printf("%s", expr1);
643 printf(", ");
644 printf("%s", expr2);
645 printf(", len");
647 printf(")");
648 }/*}}}*/
649 /*{{{ output_comparison_strncmp */
650 static void output_comparison_strncmp(u8 *expr1, u8 *expr2)
652 bool firstchar_done;
654 firstchar_done = output_firstchar_comparison(expr1, expr2);
655 printf(" && !");
656 if (OPTS(UPPERLOWER))
657 printf("gperf_case_");
658 printf("strncmp (");
659 if (firstchar_done) {
660 printf("%s", expr1);
661 printf(" + 1, ");
662 printf("%s", expr2);
663 printf(" + 1, len - 1");
664 } else {
665 printf("%s", expr1);
666 printf(", ");
667 printf("%s", expr2);
668 printf(", len");
670 printf(") && ");
671 printf("%s", expr2);
672 printf("[len] == '\\0'");
673 }/*}}}*/
674 /*{{{ output_comparison_strcmp */
675 static void output_comparison_strcmp(u8 *expr1, u8 *expr2)
677 bool firstchar_done;
679 firstchar_done = output_firstchar_comparison(expr1, expr2);
680 printf(" && !");
681 if (OPTS(UPPERLOWER))
682 printf("gperf_case_");
683 printf("strcmp (");
684 if (firstchar_done) {
685 printf("%s", expr1);
686 printf(" + 1, ");
687 printf("%s", expr2);
688 printf(" + 1");
689 } else {
690 printf("%s", expr1);
691 printf(", ");
692 printf("%s", expr2);
694 printf(")");
695 }/*}}}*/
696 /*}}} output_comparison_X -- END */
697 /*------------------------------------------------------------------------------------------------*/
698 /*}}} code -- END */
699 /*}}} local -- END */
700 /*------------------------------------------------------------------------------------------------*/
701 /*{{{ output_new */
702 /* Constructor.
703 Note about the keyword list starting at head:
704 - The list is ordered by increasing _hash_value. This has been achieved
705 by Search::sort().
706 - Duplicates, i.e. keywords with the same _selchars set, are chained
707 through the _duplicate_link pointer. Only one representative per
708 duplicate equivalence class remains on the linear keyword list.
709 - Accidental duplicates, i.e. keywords for which the _asso_values[] search
710 couldn't achieve different hash values, cannot occur on the linear
711 keyword list. Search::optimize would catch this mistake.
713 static struct Output *output_new(struct Keyword_List *head, u8 *struct_decl,
714 u32 struct_decl_lineno, u8 *return_type,
715 u8 *struct_tag, u8 *verbatim_declarations,
716 u8 *verbatim_declarations_end,
717 u32 verbatim_declarations_lineno,
718 u8 *verbatim_code, u8 *verbatim_code_end,
719 u32 verbatim_code_lineno, bool charset_dependent,
720 s32 total_keys, s32 max_key_len, s32 min_key_len,
721 bool hash_includes_len, struct Positions *positions,
722 u32 *alpha_inc, s32 total_duplicates,
723 u32 alpha_size, s32 *asso_values)
725 struct Output *t;
727 t = calloc(1, sizeof(*t));
728 t->head = head;
729 t->struct_decl = struct_decl;
730 t->struct_decl_lineno = struct_decl_lineno;
731 t->return_type = return_type;
732 t->struct_tag = struct_tag;
733 t->verbatim_declarations = verbatim_declarations;
734 t->verbatim_declarations_end = verbatim_declarations_end;
735 t->verbatim_declarations_lineno = verbatim_declarations_lineno;
736 t->verbatim_code = verbatim_code;
737 t->verbatim_code_end = verbatim_code_end;
738 t->verbatim_code_lineno = verbatim_code_lineno;
739 t->charset_dependent = charset_dependent;
740 t->total_keys = total_keys;
741 t->max_key_len = max_key_len;
742 t->min_key_len = min_key_len;
743 t->hash_includes_len = hash_includes_len;
744 t->key_positions = pos_new_cpy(positions);
745 t->alpha_inc = alpha_inc;
746 t->total_duplicates = total_duplicates;
747 t->alpha_size = alpha_size;
748 t->asso_values = asso_values;
749 return t;
750 }/*}}}*/
751 /*{{{ output_del */
752 static void output_del(struct Output *t)
754 pos_del(t->key_positions);
755 free(t);
757 /*}}}*/
758 /*{{{ output_do */
759 /* generates the hash function and the key word recognizer function based upon the user's Options */
760 static void output_do(struct Output *t)
762 output_compute_min_max(t);
763 if (OPTS(CPLUSPLUS)) /* yeah, we know nowadays that c++ is never a good idea anyway */
765 * The 'register' keyword is removed from C++17. See
766 * http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4340
768 register_scs = "";
769 else
770 register_scs = "register ";
771 if (OPTS(C) || OPTS(ANSIC) || OPTS(CPLUSPLUS)) {
772 const_always = "const ";
773 const_readonly_array = (OPTS(CONST) ? "const " : "");
774 const_for_struct = ((OPTS(CONST) && OPTS(TYPE)) ? "const " : "" );
775 } else {
776 const_always = "";
777 const_readonly_array = "";
778 const_for_struct = "";
780 if (!OPTS(TYPE)) {
781 t->return_type = (const_always[0] != 0 ? "const char *" : "char *");
782 t->struct_tag = (const_always[0] != 0 ? "const char *" : "char *");
784 t->wordlist_eltype = (OPTS(SHAREDLIB) && !OPTS(TYPE) ? (u8*)"int" : t->struct_tag);
785 printf ("/* ");
786 if (OPTS(KRC))
787 printf("KR-C");
788 else if (OPTS(C))
789 printf("C");
790 else if (OPTS(ANSIC))
791 printf("ANSI-C");
792 else if (OPTS(CPLUSPLUS))
793 printf("C++");
794 printf(" code produced by gperf version %s */\n", cgperf_version_string);
795 opts_print(options);
796 printf("\n");
797 if (!OPTS(POSITIONS)) {
798 printf ("/* Computed positions: -k'");
799 pos_print(t->key_positions);
800 printf("' */\n");
802 printf("\n");
803 if (t->charset_dependent && (t->key_positions->size > 0 || OPTS(UPPERLOWER))) {
804 printf("#if !((' ' == 32) && ('!' == 33) && ('\"' == 34) && ('#' == 35) \\\n"
805 " && ('%%' == 37) && ('&' == 38) && ('\\'' == 39) && ('(' == 40) \\\n"
806 " && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \\\n"
807 " && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \\\n"
808 " && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \\\n"
809 " && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \\\n"
810 " && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \\\n"
811 " && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \\\n"
812 " && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \\\n"
813 " && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \\\n"
814 " && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \\\n"
815 " && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \\\n"
816 " && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \\\n"
817 " && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \\\n"
818 " && ('Z' == 90) && ('[' == 91) && ('\\\\' == 92) && (']' == 93) \\\n"
819 " && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \\\n"
820 " && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \\\n"
821 " && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \\\n"
822 " && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \\\n"
823 " && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \\\n"
824 " && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \\\n"
825 " && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \\\n"
826 " && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))\n"
827 "/* The character set is not based on ISO-646. */\n");
828 printf("%s \"gperf generated tables don't work with this execution character set. Please report a bug to <bug-gperf@gnu.org>.\"\n", OPTS(KRC) || OPTS(C) ? "error" : "#error");
829 printf ("#endif\n\n");
831 if (t->verbatim_declarations < t->verbatim_declarations_end) {
832 output_line_directive(t->verbatim_declarations_lineno);
833 fwrite(t->verbatim_declarations, 1, t->verbatim_declarations_end -
834 t->verbatim_declarations, stdout);
836 if (OPTS(TYPE) && !OPTS(NOTYPE)) {
837 /* output type declaration now, reference it later on.... */
838 output_line_directive(t->struct_decl_lineno);
839 printf("%s\n", t->struct_decl);
841 if (OPTS(INCLUDE))
842 printf("#include <string.h>\n"); /* declare strlen(), strcmp(), strncmp() */
843 if (!OPTS(ENUM)) /* refactored: overzealous code factorization */
844 output_constants_defines(t);
845 else if (OPTS(GLOBAL))
846 output_constants_enum(t, "");
847 printf("/* maximum key range = %d, duplicates = %d */\n\n", t->max_hash_value - t->min_hash_value + 1, t->total_duplicates);
848 if (OPTS(UPPERLOWER)) {
849 #if USE_DOWNCASE_TABLE
850 output_upperlower_table();
851 #endif
852 if (OPTS(LENTABLE))
853 output_upperlower_memcmp();
854 else {
855 if (OPTS(COMP))
856 output_upperlower_strncmp();
857 else
858 output_upperlower_strcmp();
861 if (OPTS(CPLUSPLUS))
862 printf(
863 "class %s\n"
864 "{\n"
865 "private:\n"
866 " static inline unsigned int %s (const char *str, size_t len);\n"
867 "public:\n"
868 " static %s%s%s (const char *str, size_t len);\n"
869 "};\n"
870 "\n", options->class_name, options->hash_name, const_for_struct, t->return_type,
871 options->function_name);
872 output_hash_function(t);
873 if (OPTS(SHAREDLIB) && (OPTS(GLOBAL) || OPTS(TYPE)))
874 output_lookup_pools(t);
875 if (OPTS(GLOBAL))
876 output_lookup_tables(t);
877 output_lookup_function(t);
878 if (t->verbatim_code < t->verbatim_code_end) {
879 output_line_directive(t->verbatim_code_lineno);
880 fwrite(t->verbatim_code, 1, t->verbatim_code_end - t->verbatim_code, stdout);
882 fflush(stdout);
883 }/*}}}*/
884 /*{{{ output_compute_min_max */
885 static void output_compute_min_max(struct Output *t)
887 struct Keyword_List *tmp;
889 * since the list is already sorted by hash value all we need to do is to look at the first
890 * and the last element of the list
892 t->min_hash_value = t->head->kw->hash_value;
893 tmp = t->head;
894 loop {
895 if (tmp->next == 0)
896 break;
897 tmp = tmp->next;
899 t->max_hash_value = tmp->kw->hash_value;
900 }/*}}}*/
901 /*{{{ output_constants_defines */
902 static void output_constants_defines(struct Output *t)
904 printf("\n");
905 output_constant_define("TOTAL_KEYWORDS", t->total_keys);
906 output_constant_define("MIN_WORD_LENGTH", t->min_key_len);
907 output_constant_define("MAX_WORD_LENGTH", t->max_key_len);
908 output_constant_define("MIN_HASH_VALUE", t->min_hash_value);
909 output_constant_define("MAX_HASH_VALUE", t->max_hash_value);
910 }/*}}}*/
911 /*{{{ output_constants_enum */
912 static void output_constants_enum(struct Output *t, u8 *indentation)
914 bool pending_comma;
916 printf("%senum\n"
917 "%s {\n", indentation, indentation);
918 pending_comma = false;
919 output_constant_enum("TOTAL_KEYWORDS", t->total_keys, indentation, &pending_comma);
920 output_constant_enum("MIN_WORD_LENGTH", t->min_key_len, indentation, &pending_comma);
921 output_constant_enum("MAX_WORD_LENGTH", t->max_key_len, indentation, &pending_comma);
922 output_constant_enum("MIN_HASH_VALUE", t->min_hash_value, indentation, &pending_comma);
923 output_constant_enum("MAX_HASH_VALUE", t->max_hash_value, indentation, &pending_comma);
924 if (pending_comma)
925 printf("\n");
926 printf("%s };\n\n", indentation);
927 }/*}}}*/
928 /*{{{ output_hash_function */
929 /* Generates C code for the hash function that returns the
930 proper encoding for each keyword.
931 The hash function has the signature
932 unsigned int <hash> (const char *str, size_t len). */
933 static void output_hash_function(struct Output *t)
935 /* output the function's head */
936 if (OPTS(CPLUSPLUS))
937 printf("inline ");
938 else if (OPTS(KRC) || OPTS(C) || OPTS(ANSIC))
939 printf(
940 "#ifdef __GNUC__\n"
941 "__inline\n"
942 "#else\n"
943 "#ifdef __cplusplus\n"
944 "inline\n"
945 "#endif\n"
946 "#endif\n");
947 if (/* the function does not use the 'str' argument? */
948 (t->key_positions->size == 0)
949 || /* the function uses 'str', but not the 'len' argument? */
950 (!t->hash_includes_len
951 && t->key_positions->positions[0] < t->min_key_len)
952 && t->key_positions->positions[t->key_positions->size - 1] != POS_LASTCHAR)
953 /* pacify lint */
954 printf("/*ARGSUSED*/\n");
955 if (OPTS(KRC) || OPTS(C) || OPTS(ANSIC))
956 printf("static ");
957 printf("unsigned int\n");
958 if (OPTS(CPLUSPLUS))
959 printf("%s::", options->class_name);
960 printf("%s ", options->hash_name);
961 printf(OPTS(KRC) ?
962 "(str, len)\n"
963 " %schar *str;\n"
964 " %ssize_t len;\n" :
965 OPTS(C) ?
966 "(str, len)\n"
967 " %sconst char *str;\n"
968 " %ssize_t len;\n" :
969 OPTS(ANSIC) || OPTS(CPLUSPLUS) ?
970 "(%sconst char *str, %ssize_t len)\n" :
971 "", register_scs, register_scs);
974 * note that when the hash function is called, it has already been verified that
975 * min_key_len <= len <= max_key_len
977 /* output the function's body */
978 printf(
979 "{\n");
980 /* first the asso_values array */
981 if (t->key_positions->size > 0) {
982 s32 columns;
983 s32 field_width;
984 s32 trunc;
985 u32 count;
987 * the values in the asso_values array are all unsigned integers <= MAX_HASH_VALUE +
990 printf(
991 " static %s%s asso_values[] =\n"
992 " {", const_readonly_array, smallest_integral_type(t->max_hash_value + 1));
993 columns = 10;
994 /* calculate maximum number of digits required for MAX_HASH_VALUE + 1 */
995 field_width = 2;
996 trunc = t->max_hash_value + 1;
997 loop {
998 trunc /= 10;
999 if (trunc <= 0)
1000 break;
1001 ++field_width;
1003 count = 0;
1004 loop {
1005 if (count >= t->alpha_size)
1006 break;
1007 if (count > 0)
1008 printf(",");
1009 if ((count % columns) == 0)
1010 printf("\n ");
1011 printf("%*d", field_width, t->asso_values[count]);
1012 ++count;
1014 printf(
1015 "\n"
1016 " };\n");
1018 if (t->key_positions->size == 0) {
1019 /* trivial case: No key positions at all */
1020 printf(
1021 " return %s;\n", t->hash_includes_len ? "len" : "0");
1022 } else {
1023 struct PositionIterator *iter;
1024 s32 key_pos;
1026 * Iterate through the key positions. Remember that Positions::sort() has sorted
1027 * them in decreasing order, with Positions::LASTCHAR coming last.
1029 iter = pos_iterator(t->key_positions, t->max_key_len);
1030 /* get the highest key position */
1031 key_pos = positer_next(iter);
1032 if (key_pos == POS_LASTCHAR || key_pos < t->min_key_len) {
1034 * We can perform additional optimizations here: Write it out as a single
1035 * expression. Note that the values are added as 'int's even though the
1036 * asso_values array may contain 'unsigned char's or 'unsigned short's.
1038 printf(
1039 " return %s", t->hash_includes_len ? "len + " : "");
1040 if (t->key_positions->size == 2
1041 && t->key_positions->positions[0] == 0
1042 && t->key_positions->positions[1] == POS_LASTCHAR) {
1043 /* optimize special case of "-k 1,$" */
1044 output_asso_values_ref(t, POS_LASTCHAR);
1045 printf(" + ");
1046 output_asso_values_ref(t, 0);
1047 } else {
1048 loop {
1049 if (key_pos == POS_LASTCHAR)
1050 break;
1051 output_asso_values_ref(t, key_pos);
1052 key_pos = positer_next(iter);
1053 if (key_pos != POSITER_EOS)
1054 printf(" + ");
1055 else
1056 break;
1058 if (key_pos == POS_LASTCHAR)
1059 output_asso_values_ref(t, POS_LASTCHAR);
1061 printf(";\n");
1062 } else {
1063 u8 *fallthrough_marker;
1064 /* we've got to use the correct, but brute force, technique */
1066 * pseudo-statement or comment that avoids a compiler warning or lint
1067 * warning
1069 fallthrough_marker =
1070 "#if defined __cplusplus && (__cplusplus >= 201703L || (__cplusplus >= 201103L && defined __clang_major__ && defined __clang_minor__ && __clang_major__ + (__clang_minor__ >= 9) > 3))\n"
1071 " [[fallthrough]];\n"
1072 "#elif defined __GNUC__ && __GNUC__ >= 7\n"
1073 " __attribute__ ((__fallthrough__));\n"
1074 "#endif\n"
1075 " /*FALLTHROUGH*/\n";
1077 * it doesn't really matter whether hval is an 'int' or 'unsigned int', but
1078 * 'unsigned int' gives fewer warnings
1080 printf(
1081 " %sunsigned int hval = %s;\n\n"
1082 " switch (%s)\n"
1083 " {\n"
1084 " default:\n", register_scs, t->hash_includes_len ? "len" : "0",
1085 t->hash_includes_len ? "hval" : "len");
1086 loop {
1087 if (key_pos == POS_LASTCHAR || key_pos < t->max_key_len)
1088 break;
1089 key_pos = positer_next(iter);
1090 if (key_pos == POSITER_EOS)
1091 break;
1093 if (key_pos != POSITER_EOS && key_pos != POS_LASTCHAR) {
1094 s32 i;
1096 i = key_pos;
1097 loop {
1098 if (i > key_pos)
1099 printf("%s", fallthrough_marker);
1100 loop {
1101 if (i <= key_pos)
1102 break;
1103 printf(" case %d:\n", i);
1104 i--;
1106 printf(" hval += ");
1107 output_asso_values_ref(t, key_pos);
1108 printf(";\n");
1109 key_pos = positer_next(iter);
1110 if (key_pos == POSITER_EOS || key_pos == POS_LASTCHAR)
1111 break;
1113 if (i >= t->min_key_len)
1114 printf("%s", fallthrough_marker);
1115 loop {
1116 if (i < t->min_key_len)
1117 break;
1118 printf(" case %d:\n", i);
1119 i--;
1122 printf(
1123 " break;\n"
1124 " }\n"
1125 " return hval");
1126 if (key_pos == POS_LASTCHAR) {
1127 printf(" + ");
1128 output_asso_values_ref(t, POS_LASTCHAR);
1130 printf (";\n");
1132 positer_del(iter);
1134 printf ("}\n\n");
1135 }/*}}}*/
1136 /*{{{ output_asso_values_ref */
1137 /* Generates a C expression for an asso_values[] reference. */
1138 static void output_asso_values_ref(struct Output *t, s32 pos)
1140 printf("asso_values[");
1142 * Always cast to unsigned char. This is necessary when the alpha_inc is nonzero, and also
1143 * avoids a gcc warning "subscript has type 'char'".
1145 if (OPTS(CPLUSPLUS)) {
1147 * In C++, a C style cast may lead to a 'warning: use of old-style cast'.
1148 * Therefore prefer the C++ style cast syntax.
1150 printf("static_cast<unsigned char>(");
1151 output_asso_values_index(t, pos);
1152 printf(")");
1153 } else {
1154 printf("(unsigned char)");
1155 output_asso_values_index(t, pos);
1157 printf("]");
1158 }/*}}}*/
1159 /*{{{ output_asso_values_index */
1160 /* generates a C expression for an asso_values[] index */
1161 static void output_asso_values_index(struct Output *t, s32 pos)
1163 if (pos == POS_LASTCHAR)
1164 printf("str[len - 1]");
1165 else {
1166 printf("str[%d]", pos);
1167 if (t->alpha_inc[pos])
1168 printf("+%u", t->alpha_inc[pos]);
1170 }/*}}}*/
1171 /*{{{ output_lookup_pools */
1172 /* generate all pools needed for the lookup function */
1173 static void output_lookup_pools(struct Output *t)
1175 if (OPTS(SWITCH)) {
1176 if (OPTS(TYPE) || (OPTS(DUP) && t->total_duplicates > 0))
1177 output_string_pool(t);
1178 } else
1179 output_string_pool(t);
1180 }/*}}}*/
1181 /*{{{ output_string_pool */
1183 * Prints out the string pool, containing the strings of the keyword table.
1184 * Only called if option[SHAREDLIB]
1186 static void output_string_pool(struct Output *t)
1188 u8 *indent;
1189 s32 index;
1190 struct Keyword_List *tmp;
1192 indent = OPTS(TYPE) || OPTS(GLOBAL) ? "" : " ";
1194 printf(
1195 "%sstruct %s_t\n"
1196 "%s {\n", indent, options->stringpool_name, indent);
1197 tmp = t->head;
1198 index = 0;
1199 loop {
1200 struct Keyword *kw;
1202 if (tmp == 0)
1203 break;
1204 kw = tmp->kw;
1206 * If generating a switch statement, and there is no user defined type, we generate
1207 * non-duplicates directly in the code. Only duplicates go into the table.
1209 if (OPTS(SWITCH) && !OPTS(TYPE) && kw->duplicate_link == 0)
1210 continue;
1211 if (!OPTS(SWITCH) && !OPTS(DUP))
1212 index = kw->hash_value;
1213 printf("%s char %s_str%d[sizeof(", indent, options->stringpool_name, index);
1214 output_string(kw->allchars, kw->allchars_length);
1215 printf(")];\n");
1216 /* deal with duplicates specially */
1217 if (kw->duplicate_link) {/* implies option[DUP] */
1218 struct Keyword *links;
1220 links = kw->duplicate_link;
1221 loop {
1222 if (links == 0)
1223 break;
1224 if (!(links->allchars_length == kw->allchars_length
1225 && memcmp(links->allchars, kw->allchars,
1226 kw->allchars_length) == 0)) {
1227 ++index;
1228 printf("%s char %s_str%d[sizeof(", indent,
1229 options->stringpool_name, index);
1230 output_string(links->allchars, links->allchars_length);
1231 printf(")];\n");
1233 links = links->duplicate_link;
1236 ++index;
1237 tmp = tmp->next;
1239 printf(
1240 "%s };\n", indent);
1241 printf(
1242 "%sstatic %sstruct %s_t %s_contents =\n"
1243 "%s {\n", indent, const_readonly_array, options->stringpool_name, options->stringpool_name,
1244 indent);
1245 tmp = t->head;
1246 index = 0;
1247 loop {
1248 struct Keyword *kw;
1250 if (tmp == 0)
1251 break;
1252 kw = tmp->kw;
1254 * If generating a switch statement, and there is no user defined type, we generate
1255 * non-duplicates directly in the code. Only duplicates go into the table.
1257 if (OPTS(SWITCH) && !OPTS(TYPE) && kw->duplicate_link == 0)
1258 continue;
1259 if (index > 0)
1260 printf(",\n");
1262 if (!OPTS(SWITCH) && !OPTS(DUP))
1263 index = kw->hash_value;
1264 printf(
1265 "%s ", indent);
1266 output_string(kw->allchars, kw->allchars_length);
1267 /* deal with duplicates specially */
1268 if (kw->duplicate_link != 0) {/* implies option[DUP] */
1269 struct Keyword *links;
1271 links = kw->duplicate_link;
1272 loop {
1273 if (links == 0)
1274 break;
1275 if (!(links->allchars_length == kw->allchars_length
1276 && memcmp(links->allchars, kw->allchars,
1277 kw->allchars_length) == 0)) {
1278 ++index;
1279 printf(",\n");
1280 printf(
1281 "%s ", indent);
1282 output_string(links->allchars, links->allchars_length);
1284 links = links->duplicate_link;
1287 ++index;
1288 tmp = tmp->next;
1290 if (index > 0)
1291 printf("\n");
1292 printf(
1293 "%s };\n", indent);
1294 printf(
1295 "%s#define %s ((%schar *) &%s_contents)\n", indent, options->stringpool_name, const_always,
1296 options->stringpool_name);
1297 if (OPTS(GLOBAL))
1298 printf(
1299 "\n");
1300 }/*}}}*/
1301 /*{{{ output_lookup_tables */
1302 /* generate all the tables needed for the lookup function */
1303 static void output_lookup_tables(struct Output *t)
1305 if (OPTS(SWITCH)) {
1306 /* use the switch in place of lookup table */
1307 if (OPTS(LENTABLE) && (OPTS(DUP) && t->total_duplicates > 0))
1308 output_keylength_table(t);
1309 if (OPTS(TYPE) || (OPTS(DUP) && t->total_duplicates > 0))
1310 output_keyword_table(t);
1311 } else {
1312 /* use the lookup table, in place of switch */
1313 if (OPTS(LENTABLE))
1314 output_keylength_table(t);
1315 output_keyword_table(t);
1316 output_lookup_array(t);
1318 }/*}}}*/
1319 /*{{{ output_keylength_table */
1321 * Prints out a table of keyword lengths, for use with the comparison code in generated function
1322 * 'in_word_set'. Only called if option[LENTABLE].
1324 static void output_keylength_table(struct Output *t)
1326 s32 columns;
1327 u8 *indent;
1328 s32 index;
1329 s32 column;
1330 struct Keyword_List *tmp;
1332 columns = 14;
1333 indent = OPTS(GLOBAL) ? "" : " ";
1335 printf(
1336 "%sstatic %s%s %s[] =\n"
1337 "%s {", indent, const_readonly_array, smallest_integral_type(t->max_key_len),
1338 options->lengthtable_name, indent);
1339 column = 0;
1340 tmp = t->head;
1341 index = 0;
1342 loop {
1343 struct Keyword *kw;
1345 if (tmp == 0)
1346 break;
1347 kw = tmp->kw;
1349 * If generating a switch statement, and there is no user defined type, we generate
1350 * non-duplicates directly in the code. Only duplicates go into the table.
1352 if (OPTS(SWITCH) && !OPTS(TYPE) && kw->duplicate_link == 0)
1353 continue;
1354 if (index < kw->hash_value && !OPTS(SWITCH) && !OPTS(DUP)) {
1355 /* some blank entries */
1356 loop {
1357 if (index >= kw->hash_value)
1358 break;
1359 if (index > 0)
1360 printf(",");
1361 if ((column % columns) == 0)
1362 printf(
1363 "\n%s ", indent);
1364 ++column;
1365 printf("%3d", 0);
1366 ++index;
1369 if (index > 0)
1370 printf(",");
1371 if ((column % columns) == 0)
1372 printf(
1373 "\n%s ", indent);
1374 ++column;
1375 printf("%3d", kw->allchars_length);
1376 ++index;
1377 /* deal with duplicates specially */
1378 if (kw->duplicate_link != 0) {
1379 struct Keyword *links;
1381 links = kw->duplicate_link;
1382 loop {
1383 if (links == 0)
1384 break;
1385 printf(",");
1386 if ((column % columns) == 0)
1387 printf(
1388 "\n%s ", indent);
1389 ++column;
1390 printf("%3d", links->allchars_length);
1391 ++index;
1392 links = links->duplicate_link;
1395 tmp = tmp->next;
1397 printf(
1398 "\n%s };\n", indent);
1399 if (OPTS(GLOBAL))
1400 printf(
1401 "\n");
1402 }/*}}}*/
1403 /*{{{ output_keyword_table */
1404 /* prints out the array containing the keywords for the hash function */
1405 static void output_keyword_table(struct Output *t)
1407 u8 *indent;
1408 s32 index;
1409 struct Keyword_List *tmp;
1411 indent = OPTS(GLOBAL) ? "" : " ";
1412 printf(
1413 "%sstatic ", indent);
1414 output_const_type(const_readonly_array, t->wordlist_eltype);
1415 printf("%s[] =\n"
1416 "%s {\n", options->wordlist_name, indent);
1417 /* generate an array of reserved words at appropriate locations */
1418 tmp = t->head;
1419 index = 0;
1420 loop {
1421 struct Keyword *kw;
1423 if (tmp == 0)
1424 break;
1425 kw = tmp->kw;
1427 * If generating a switch statement, and there is no user defined type, we generate
1428 * non-duplicates directly in the code. Only duplicates go into the table.
1430 if (OPTS(SWITCH) && !OPTS(TYPE) && kw->duplicate_link == 0)
1431 continue;
1432 if (index > 0)
1433 printf(",\n");
1434 if (index < kw->hash_value && !OPTS(SWITCH) && !OPTS(DUP)) {
1435 /* some blank entries */
1436 output_keyword_blank_entries(kw->hash_value - index, indent);
1437 printf(",\n");
1438 index = kw->hash_value;
1440 kw->final_index = index;
1441 output_keyword_entry(kw, index, indent, false);
1442 /* deal with duplicates specially */
1443 if (kw->duplicate_link != 0) { /* implies option[DUP] */
1444 struct Keyword *links;
1446 links = kw->duplicate_link;
1447 loop {
1448 s32 stringpool_index;
1450 if (links == 0)
1451 break;
1452 ++index;
1453 links->final_index = index;
1454 printf(",\n");
1455 stringpool_index =
1456 (links->allchars_length == kw->allchars_length
1457 && memcmp(links->allchars, kw->allchars,
1458 kw->allchars_length) == 0
1459 ? kw->final_index : links->final_index);
1460 output_keyword_entry(links, stringpool_index, indent, true);
1461 links = links->duplicate_link;
1464 ++index;
1465 tmp = tmp->next;
1467 if (index > 0)
1468 printf("\n");
1469 printf(
1470 "%s };\n\n", indent);
1471 }/*}}}*/
1472 /*{{{ output_lookup_array */
1474 * generates the large, sparse table that maps hash values into the smaller, contiguous range of the
1475 * keyword table
1477 static void output_lookup_array(struct Output *t)
1479 s32 DEFAULT_VALUE;
1480 struct Duplicate_Entry *duplicates;
1481 s32 *lookup_array;
1482 s32 lookup_array_size;
1483 struct Duplicate_Entry *dup_ptr;
1484 s32 *lookup_ptr;
1485 struct Keyword_List *tmp;
1486 s32 min;
1487 s32 max;
1488 u8 *indent;
1489 s32 field_width;
1490 s32 columns;
1491 s32 column;
1492 s32 i;
1493 if (!OPTS(DUP))
1494 return;
1496 DEFAULT_VALUE = -1;
1498 duplicates = calloc(t->total_duplicates, sizeof(*duplicates));
1499 lookup_array = calloc(t->max_hash_value + 1 + 2 * t->total_duplicates,
1500 sizeof(*lookup_array));
1501 lookup_array_size = t->max_hash_value + 1;
1502 dup_ptr = &duplicates[0];
1503 lookup_ptr = &lookup_array[t->max_hash_value + 1 + 2 * t->total_duplicates];
1505 loop {
1506 if (lookup_ptr <= lookup_array)
1507 break;
1508 *--lookup_ptr = DEFAULT_VALUE;
1510 /* now dup_ptr = &duplicates[0] and lookup_ptr = &lookup_array[0] */
1511 tmp = t->head;
1512 loop {
1513 s32 hash_value;
1515 if (tmp == 0)
1516 break;
1517 hash_value = tmp->kw->hash_value;
1518 lookup_array[hash_value] = tmp->kw->final_index;
1519 if (OPTS(DEBUG))
1520 fprintf(stderr, "keyword = %.*s, index = %d\n", tmp->kw->allchars_length, tmp->kw->allchars, tmp->kw->final_index);
1521 if (tmp->kw->duplicate_link != 0) {
1522 struct Keyword *ptr;
1524 /* start a duplicate entry */
1525 dup_ptr->hash_value = hash_value;
1526 dup_ptr->index = tmp->kw->final_index;
1527 dup_ptr->count = 1;
1529 ptr = tmp->kw->duplicate_link;
1530 loop {
1531 if (ptr != 0)
1532 break;
1533 ++(dup_ptr->count);
1534 if (OPTS(DEBUG))
1535 fprintf(stderr, "static linked keyword = %.*s, index = %d\n", ptr->allchars_length, ptr->allchars, ptr->final_index);
1536 ptr = ptr->duplicate_link;
1538 ++dup_ptr;
1540 tmp = tmp->next;
1542 loop {
1543 s32 i;
1545 if (dup_ptr <= duplicates)
1546 break;
1547 dup_ptr--;
1548 if (OPTS(DEBUG))
1549 fprintf(stderr, "dup_ptr[%lu]: hash_value = %d, index = %d, count = %d\n", (unsigned long)(dup_ptr - duplicates), dup_ptr->hash_value, dup_ptr->index, dup_ptr->count);
1551 * start searching for available space towards the right part of the lookup
1552 * array
1554 i = dup_ptr->hash_value;
1555 loop {
1556 if (i >= lookup_array_size - 1)
1557 break;
1558 if (lookup_array[i] == DEFAULT_VALUE && lookup_array[i + 1]
1559 == DEFAULT_VALUE)
1560 goto found_i;
1561 ++i;
1563 /* if we didn't find it to the right look to the left instead... */
1564 i = dup_ptr->hash_value - 1;
1565 loop {
1566 if (i < 0)
1567 break;
1568 if (lookup_array[i] == DEFAULT_VALUE && lookup_array[i + 1]
1569 == DEFAULT_VALUE)
1570 goto found_i;
1571 i--;
1573 /* append to the end of lookup_array */
1574 i = lookup_array_size;
1575 lookup_array_size += 2;
1576 found_i:
1578 * Put in an indirection from dup_ptr->_hash_value to i.
1579 * At i and i+1 store dup_ptr->_final_index and dup_ptr->count.
1581 lookup_array[dup_ptr->hash_value] = - 1 - t->total_keys - i;
1582 lookup_array[i] = - t->total_keys + dup_ptr->index;
1583 lookup_array[i + 1] = - dup_ptr->count;
1584 /* All these three values are <= -2, distinct from DEFAULT_VALUE */
1586 /* the values of the lookup array are now known */
1587 min = S32_MAX;
1588 max = S32_MIN;
1589 lookup_ptr = lookup_array + lookup_array_size;
1590 loop {
1591 s32 val;
1593 if (lookup_ptr <= lookup_array)
1594 break;
1595 val = *--lookup_ptr;
1596 if (min > val)
1597 min = val;
1598 if (max < val)
1599 max = val;
1601 indent = OPTS(GLOBAL) ? "" : " ";
1602 printf(
1603 "%sstatic %s%s lookup[] =\n"
1604 "%s {", indent, const_readonly_array, smallest_integral_type_2(min, max), indent);
1605 /* calculate maximum number of digits required for MIN..MAX */
1607 s32 trunc;
1609 field_width = 2;
1610 trunc = max;
1611 loop {
1612 trunc /= 10;
1613 if (trunc <= 0)
1614 break;
1615 ++field_width;
1618 if (min < 0) {
1619 s32 neg_field_width;
1620 s32 trunc;
1622 neg_field_width = 2;
1623 trunc = -min;
1624 loop {
1625 trunc /= 10;
1626 if (trunc <= 0)
1627 break;
1628 ++neg_field_width;
1630 ++neg_field_width; /* account for the minus sign */
1631 if (field_width < neg_field_width)
1632 field_width = neg_field_width;
1634 columns = 42 / field_width;
1635 column = 0;
1636 i = 0;
1637 loop {
1638 if (i >= lookup_array_size)
1639 break;
1640 if (i > 0)
1641 printf(",");
1642 if ((column % columns) == 0)
1643 printf("\n%s ", indent);
1644 ++column;
1645 printf("%*d", field_width, lookup_array[i]);
1646 ++i;
1648 printf(
1649 "\n%s };\n\n", indent);
1650 free(duplicates);
1651 free(lookup_array);
1652 }/*}}}*/
1653 /*{{{ output_lookup_function */
1654 /* generates C code for the lookup function */
1655 static void output_lookup_function(struct Output *t)
1657 /* output the function's head */
1659 * We don't declare the lookup function 'static' because we cannot make assumptions about
1660 * the compilation units of the user.
1661 * Since we don't make it 'static', it makes no sense to declare it 'inline', because
1662 * non-static inline functions must not reference static functions or variables, see ISO C
1663 * 99 section 6.7.4.(3).
1665 printf(
1666 "%s%s\n", const_for_struct, t->return_type);
1667 if (OPTS(CPLUSPLUS))
1668 printf(
1669 "%s::", options->class_name);
1670 printf("%s ", options->function_name);
1671 printf(
1672 OPTS(KRC) ? "(str, len)\n"
1673 " %schar *str;\n"
1674 " %ssize_t len;\n" :
1675 OPTS(C) ? "(str, len)\n"
1676 " %sconst char *str;\n"
1677 " %ssize_t len;\n" :
1678 OPTS(ANSIC) || OPTS(CPLUSPLUS) ? "(%sconst char *str, %ssize_t len)\n" :
1679 "", register_scs, register_scs);
1681 /* output the function's body */
1682 printf(
1683 "{\n");
1684 if (OPTS(ENUM) && !OPTS(GLOBAL))
1685 output_constants_enum(t, " ");
1686 if (OPTS(SHAREDLIB) && !(OPTS(GLOBAL) || OPTS(TYPE)))
1687 output_lookup_pools(t);
1688 if (!OPTS(GLOBAL))
1689 output_lookup_tables(t);
1690 if (OPTS(LENTABLE))
1691 output_lookup_function_body(t, output_comparison_memcmp);
1692 else {
1693 if (OPTS(COMP))
1694 output_lookup_function_body(t, output_comparison_strncmp);
1695 else
1696 output_lookup_function_body(t, output_comparison_strcmp);
1698 printf(
1699 "}\n");
1700 }/*}}}*/
1701 /*{{{ output_lookup_function_body */
1702 static void output_lookup_function_body(struct Output *t,
1703 void (*output_comparison)(u8 *expr1, u8 *expr2))
1705 printf(
1706 " if (len <= %sMAX_WORD_LENGTH && len >= %sMIN_WORD_LENGTH)\n"
1707 " {\n"
1708 " %sunsigned int key = %s (str, len);\n\n", options->constants_prefix,
1709 options->constants_prefix, register_scs, options->hash_name);
1710 if (OPTS(SWITCH)) {
1711 s32 switch_size;
1712 s32 num_switches;
1714 switch_size = output_num_hash_values(t);
1715 num_switches = options->total_switches;
1716 if (num_switches > switch_size)
1717 num_switches = switch_size;
1718 printf(
1719 " if (key <= %sMAX_HASH_VALUE", options->constants_prefix);
1720 if (t->min_hash_value > 0)
1721 printf(
1722 " && key >= %sMIN_HASH_VALUE", options->constants_prefix);
1723 printf (
1724 ")\n"
1725 " {\n");
1726 if (OPTS(DUP) && t->total_duplicates > 0) {
1727 if (OPTS(LENTABLE))
1728 printf(
1729 " %s%s%s *lengthptr;\n", register_scs, const_always, smallest_integral_type(
1730 t->max_key_len));
1731 printf(
1732 " %s", register_scs);
1733 output_const_type(const_readonly_array, t->wordlist_eltype);
1734 printf("*wordptr;\n");
1735 printf(
1736 " %s", register_scs);
1737 output_const_type(const_readonly_array, t->wordlist_eltype);
1738 printf("*wordendptr;\n");
1740 if (OPTS(TYPE)) {
1741 printf(
1742 " %s", register_scs);
1743 output_const_type(const_readonly_array, t->struct_tag);
1744 printf("*resword;\n\n");
1745 } else
1746 printf(
1747 " %s%sresword;\n\n", register_scs, t->struct_tag);
1748 output_switches(t->head, num_switches, switch_size, t->min_hash_value,
1749 t->max_hash_value, 10);
1750 printf(
1751 " return 0;\n");
1752 if (OPTS(DUP) && t->total_duplicates > 0) {
1753 s32 indent;
1755 indent = 8;
1756 printf(
1757 "%*smulticompare:\n"
1758 "%*s while (wordptr < wordendptr)\n"
1759 "%*s {\n", indent, "", indent, "", indent, "");
1760 if (OPTS(LENTABLE)) {
1761 printf(
1762 "%*s if (len == *lengthptr)\n"
1763 "%*s {\n", indent, "", indent, "");
1764 indent += 4;
1766 printf(
1767 "%*s %s%schar *s = ", indent, "", register_scs, const_always);
1768 if (OPTS(TYPE))
1769 printf("wordptr->%s", options->slot_name);
1770 else
1771 printf("*wordptr");
1772 if (OPTS(SHAREDLIB))
1773 printf(" + %s", options->stringpool_name);
1774 printf(";\n\n"
1775 "%*s if (", indent, "");
1776 output_comparison("str", "s");
1777 printf(")\n"
1778 "%*s return %s;\n", indent, "", OPTS(TYPE) ? "wordptr" : "s");
1779 if (OPTS(LENTABLE)) {
1780 indent -= 4;
1781 printf(
1782 "%*s }\n", indent, "");
1784 if (OPTS(LENTABLE))
1785 printf(
1786 "%*s lengthptr++;\n", indent, "");
1787 printf(
1788 "%*s wordptr++;\n"
1789 "%*s }\n"
1790 "%*s return 0;\n", indent, "", indent, "", indent, "");
1792 printf(
1793 " compare:\n");
1794 if (OPTS(TYPE)) {
1795 printf(
1796 " {\n"
1797 " %s%schar *s = resword->%s", register_scs, const_always, options->slot_name);
1798 if (OPTS(SHAREDLIB))
1799 printf(" + %s", options->stringpool_name);
1800 printf(";\n\n"
1801 " if (");
1802 output_comparison("str", "s");
1803 printf(
1804 ")\n"
1805 " return resword;\n"
1806 " }\n");
1807 } else {
1808 output_comparison("str", "resword");
1809 printf(
1810 ")\n"
1811 " return resword;\n");
1813 printf(
1814 " }\n");
1815 } else {
1816 printf(
1817 " if (key <= %sMAX_HASH_VALUE)\n", options->constants_prefix);
1818 if (OPTS(DUP)) {
1819 s32 indent;
1821 indent = 8;
1822 printf(
1823 "%*s{\n"
1824 "%*s %sint index = lookup[key];\n\n"
1825 "%*s if (index >= 0)\n", indent, "", indent, "", register_scs, indent, "");
1826 if (OPTS(LENTABLE)) {
1827 printf(
1828 "%*s {\n"
1829 "%*s if (len == %s[index])\n", indent, "", indent, "", options->lengthtable_name);
1830 indent += 4;
1832 printf(
1833 "%*s {\n"
1834 "%*s %s%schar *s = %s[index]", indent, "", indent, "", register_scs, const_always,
1835 options->wordlist_name);
1836 if (OPTS(TYPE))
1837 printf(".%s", options->slot_name);
1838 if (OPTS(SHAREDLIB))
1839 printf (" + %s", options->stringpool_name);
1840 printf(";\n\n"
1841 "%*s if (", indent, "");
1842 output_comparison("str", "s");
1843 printf (")\n"
1844 "%*s return ", indent, "");
1845 if (OPTS(TYPE))
1846 printf("&%s[index]", options->wordlist_name);
1847 else
1848 printf("s");
1849 printf(";\n"
1850 "%*s }\n", indent, "");
1851 if (OPTS(LENTABLE)) {
1852 indent -= 4;
1853 printf(
1854 "%*s }\n", indent, "");
1856 if (t->total_duplicates > 0) {
1857 printf(
1858 "%*s else if (index < -%sTOTAL_KEYWORDS)\n"
1859 "%*s {\n"
1860 "%*s %sint offset = - 1 - %sTOTAL_KEYWORDS - index;\n", indent, "", options->constants_prefix,
1861 indent, "", indent, "", register_scs,
1862 options->constants_prefix);
1863 if (OPTS(LENTABLE))
1864 printf(
1865 "%*s %s%s%s *lengthptr = &%s[%sTOTAL_KEYWORDS + lookup[offset]];\n", indent, "",
1866 register_scs, const_always,
1867 smallest_integral_type(t->max_key_len),
1868 options->lengthtable_name,
1869 options->constants_prefix);
1870 printf(
1871 "%*s %s", indent, "", register_scs);
1872 output_const_type(const_readonly_array, t->wordlist_eltype);
1873 printf("*wordptr = &%s[%sTOTAL_KEYWORDS + lookup[offset]];\n",
1874 options->wordlist_name, options->constants_prefix);
1875 printf(
1876 "%*s %s", indent, "", register_scs);
1877 output_const_type(const_readonly_array, t->wordlist_eltype);
1878 printf("*wordendptr = wordptr + -lookup[offset + 1];\n\n");
1879 printf(
1880 "%*s while (wordptr < wordendptr)\n"
1881 "%*s {\n", indent, "", indent, "");
1882 if (OPTS(LENTABLE)) {
1883 printf(
1884 "%*s if (len == *lengthptr)\n"
1885 "%*s {\n", indent, "", indent, "");
1886 indent += 4;
1888 printf(
1889 "%*s %s%schar *s = ", indent, "", register_scs, const_always);
1890 if (OPTS(TYPE))
1891 printf("wordptr->%s", options->slot_name);
1892 else
1893 printf("*wordptr");
1894 if (OPTS(SHAREDLIB))
1895 printf(" + %s", options->stringpool_name);
1896 printf (";\n\n"
1897 "%*s if (", indent, "");
1898 output_comparison("str", "s");
1899 printf (")\n"
1900 "%*s return %s;\n", indent, "", OPTS(TYPE) ? "wordptr" : "s");
1901 if (OPTS(LENTABLE)) {
1902 indent -= 4;
1903 printf(
1904 "%*s }\n", indent, "");
1906 if (OPTS(LENTABLE))
1907 printf(
1908 "%*s lengthptr++;\n", indent, "");
1909 printf(
1910 "%*s wordptr++;\n"
1911 "%*s }\n"
1912 "%*s }\n", indent, "", indent, "", indent, "");
1914 printf(
1915 "%*s}\n", indent, "");
1916 } else {
1917 s32 indent;
1919 indent = 8;
1920 if (OPTS(LENTABLE)) {
1921 printf(
1922 "%*sif (len == %s[key])\n", indent, "", options->lengthtable_name);
1923 indent += 2;
1925 if (OPTS(SHAREDLIB)) {
1926 if (!OPTS(LENTABLE)) {
1927 printf(
1928 "%*s{\n"
1929 "%*s %sint o = %s[key]", indent, "", indent, "", register_scs,
1930 options->wordlist_name);
1931 if (OPTS(TYPE))
1932 printf(".%s", options->slot_name);
1933 printf (";\n"
1934 "%*s if (o >= 0)\n"
1935 "%*s {\n", indent, "", indent, "");
1936 indent += 4;
1937 printf(
1938 "%*s %s%schar *s = o", indent, "", register_scs, const_always);
1939 } else {
1941 * no need for the (o >= 0) test, because the
1942 * (len == lengthtable[key]) test already guarantees that
1943 * key points to nonempty table entry
1945 printf (
1946 "%*s{\n"
1947 "%*s %s%schar *s = %s[key]", indent, "", indent, "", register_scs, const_always,
1948 options->wordlist_name);
1949 if (OPTS(TYPE))
1950 printf(".%s", options->slot_name);
1952 printf (" + %s", options->stringpool_name);
1953 } else {
1954 printf(
1955 "%*s{\n"
1956 "%*s %s%schar *s = %s[key]", indent, "", indent, "", register_scs, const_always,
1957 options->wordlist_name);
1958 if (OPTS(TYPE))
1959 printf(".%s", options->slot_name);
1961 printf (";\n\n"
1962 "%*s if (", indent, "");
1963 if (!OPTS(SHAREDLIB) && OPTS(NULLSTRINGS))
1964 printf ("s && ");
1965 output_comparison("str", "s");
1966 printf (")\n"
1967 "%*s return ", indent, "");
1968 if (OPTS(TYPE))
1969 printf("&%s[key]", options->wordlist_name);
1970 else
1971 printf("s");
1972 printf(";\n");
1973 if (OPTS(SHAREDLIB) && !OPTS(LENTABLE)) {
1974 indent -= 4;
1975 printf(
1976 "%*s }\n", indent, "");
1978 printf(
1979 "%*s}\n", indent, "");
1982 printf(
1983 " }\n"
1984 " return 0;\n");
1985 }/*}}}*/
1986 /*{{{ output_num_hash_values */
1987 /* Returns the number of different hash values. */
1988 static s32 output_num_hash_values(struct Output *t)
1990 s32 count;
1991 struct Keyword_List *tmp;
1993 * since the list is already sorted by hash value and doesn't contain duplicates, we can
1994 * simply count the number of keywords on the list
1996 count = 0;
1997 tmp = t->head;
1998 loop {
1999 if (tmp == 0)
2000 break;
2001 ++count;
2002 tmp = tmp->next;
2004 return count;
2005 }/*}}}*/
2006 /*------------------------------------------------------------------------------------------------*/
2007 #undef USE_DOWNCASE_TABLE
2008 /*------------------------------------------------------------------------------------------------*/
2009 #define EPILOG
2010 #include "namespace/globals.h"
2011 #include "namespace/options.h"
2012 #include "namespace/output.h"
2013 #include "namespace/output.c"
2014 #include "namespace/keyword.h"
2015 #include "namespace/keyword_list.h"
2016 #include "namespace/positions.h"
2017 #undef EPILOG
2018 /*------------------------------------------------------------------------------------------------*/
2019 #endif