gen-strtab.awk: Work around IRIX 6.2 nawk bug.
[dxcommon.git] / scripts / gen-options.awk
blobefd76fe6f5d521eb888585aacb93e6b13336a094
1 #!/bin/awk -f
3 # Generate definitions helpful when using getopt_long from an options
4 # specification file.
6 # Copyright © 2021, 2023-2024 Nick Bowler
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 # The options specification file is processed line by line. Any line
22 # beginning with a - character introduces a new option definition. Each
23 # option definition specifies any or all of a short option name, a long
24 # option name, an argument specification, and an action specification.
26 # Only the long option name is mandatory. It is not possible to define
27 # short options without a corresponding long option.
29 # The optional short option name is first, and consists of a hyphen (which
30 # must be the first character on the line) followed by the one character
31 # short option name, followed by a comma.
33 # The long option name is next on the line, which consists of two hyphens
34 # followed by the desired option name. If the short option name was omitted,
35 # then the first hyphen of the long option name must be the first character
36 # on the line.
38 # The argument specification is next, consisting of an equals sign followed by
39 # the argument name. The argument name can be any sequence of non-whitespace
40 # characters and only relevant for --help text.
42 # If the argument specification is surrounded by square brackets, this
43 # indicates an optional argument. If the argument specification is omitted
44 # completely, this option has no argument. Otherwise, the option has a
45 # mandatory argument.
47 # Finally, the optional action specification defines how the "flag" and
48 # "val" members are set in the option structure for this option. An action
49 # specification may only be provided for options with no short name.
51 # If the action specification is omitted, then flag will be set to a null
52 # pointer and val is set to the short option character, if any, otherwise the
53 # unique enumeration constant LOPT_xxx for this option (described below).
55 # The action specification can be of the form (val) or (flag, val), where flag
56 # and val are C expressions suitable for use in an initializer for objects
57 # with static storage duration. Neither flag nor val may contain commas or
58 # whitespace. In the first form, the option's flag is set to a null pointer.
60 # Any amount of whitespace may follow the short option name, the argument
61 # specification, the action specification, or the comma within an action
62 # specification. Whitespace is not permitted between a long option name
63 # and a flag specification.
65 # Examples of option specifications:
67 # -h, --help
68 # --do-nothing (0)
69 # -o, --output=FILE
70 # --pad[=VAL]
71 # --parse-only (&parse_only, 1)
73 # Each option is assigned an enumeration constant of the form LOPT_xxx,
74 # where xxx is the long option name with all letters in uppercase and
75 # all non-alphanumeric characters replaced with underscores. The value
76 # of the constants is unspecified, except that they will be unique across
77 # all defined options and distinct from the integer value of any short
78 # option character.
80 # The object-like macro SOPT_STRING expands to a string literal suitable
81 # for use as the optstring argument to getopt et al.
83 # The object-like macro LOPTS_INITIALIZER expands to a comma-separated
84 # sequence of struct option initializers, suitable for use in a declaration
85 # of an array of struct option elements with static storage duration. The
86 # all-zero terminating element required by getopt_long must be added by the
87 # user. For example:
89 # static const struct option lopts[] = { LOPTS_INITIALIZER, {0} };
91 # If none of the options have action specifications, then an alternate
92 # set of macros is also defined, which encode the struct option array
93 # into a more compact format that can be used to generate the full
94 # 'struct option' array at runtime:
96 # * the object-like macro LOPT_PACK_BITS expands to an integer constant
97 # expression, suitable for use in #if directives, that specifies the
98 # minimum number of bits required by the encoding. LOPT_PACK_BITS2
99 # is the same, but rounded up to the next power of two greater than
100 # or equal to 8.
102 # * the object-like macro LOPTS_PACKED_INITIALIZER expands to a
103 # comma-separated sequence of integer constant expressions, suitable
104 # for initializing an array of integers. All values are less than
105 # 2^LOPT_PACK_BITS.
107 # * the function-like macro LOPT_UNPACK(opt, x), where opt is an
108 # lvalue of type 'struct option', and x is one of the array
109 # elements initialized by LOPTS_PACKED_INITIALIZER. This expands
110 # the encoded value and sets the name, has_arg and val members of
111 # opt appopriately. The caller should ensure that the flag member
112 # is set to zero.
114 # The help text for an individual struct option element may be obtained by
115 # the function
117 # struct lopt_help { const char *desc, *arg; }
118 # *lopt_get_help(const struct option *opt);
120 # The returned desc and arg pointers point to the argument name and help text
121 # for the argument, respectively, as written in the options specification file.
123 END {
124 print "/*"
125 if (FILENAME) {
126 print " * Automatically generated by gen-options.awk from " FILENAME
127 } else {
128 print " * Automatically generated by gen-options.awk"
130 print " * Do not edit."
131 print " */"
134 BEGIN {
135 # Check if "\\\\" in substitutions gives just one backslash.
136 bs = "x"; sub(/x/, "\\\\", bs);
137 bs = (length(bs) == 1 ? "\\\\" : "\\");
139 has_actions = 0
140 sopt_string = ""
141 num_options = 0
142 lopt = ""
143 err = 0
146 # Parse option specifier lines
147 $0 ~ /^-/ {
148 work = $0
149 arg = lopt = sopt = ""
150 has_arg = 0
152 # Extract short option name
153 if (work ~ /^-[^-]/) {
154 sopt = substr(work, 2, 1)
155 sub(/^-.,[ \t]*/, "", work)
158 # Extract long option name
159 if (work ~ /^--/) {
160 if (n = match(work, /[= \t[]/)) {
161 lopt = substr(work, 3, n-3)
162 work = substr(work, n)
163 } else {
164 lopt = substr(work, 3)
165 work = ""
169 # Extract argument name
170 if (work ~ /^\[=[^ \t]+\]/ && sub(/\]/, "&", work) == 1) {
171 if (n = index(work, "]")) {
172 arg = substr(work, 3, n-3)
173 work = substr(work, n+1)
175 has_arg = 2
176 } else if (work ~ /^=/) {
177 if (n = match(work, /[ \t]/)) {
178 arg = substr(work, 2, n-2)
179 work = substr(work, n)
180 } else {
181 arg = substr(work, 2)
182 work = ""
184 has_arg = 1
187 # Extract action
188 sub(/^[ \t]*/, "", work)
189 if (!sopt && work ~ /^\([^, \t]+(,[ \t]*[^, \t]+)?\)/) {
190 # packed form is not possible w/ actions
191 has_actions = 1;
193 n = split(work, a, ",[ \t]*")
194 if (n == 2) {
195 flag = substr(a[1], 2) ", " substr(a[2], 1, length(a[2])-1)
196 } else if (n == 1) {
197 flag = "0, " substr(a[1], 2, length(a[1])-2)
199 sub(/^\([^, \t]+(,[ \t]*[^, \t]+)?/, "", work)
200 } else if (sopt) {
201 flag = "0, '" sopt "'"
202 } else {
203 flag = "0, " to_enum(lopt)
206 if (work) {
207 print "invalid option specification:", $0 > "/dev/stderr"
208 err = 1
209 exit
212 if (sopt) {
213 sopt_string = sopt_string sopt substr("::", 1, has_arg)
215 options[num_options++] = lopt
216 optionspec[lopt] = has_arg ", " flag
217 if (arg) {
218 optionarg[lopt] = arg
221 next
224 # Ignore any line beginning with a #
225 $0 ~ /^#/ { next; }
227 NF && lopt != "" {
228 sub(/^[ \t]*/, "");
230 if (lopt in optionhelp)
231 $0 = "\n" $0;
232 optionhelp[lopt] = optionhelp[lopt] $0;
235 END {
236 # Exit immediately on error
237 if (err) exit err;
239 print "#include <limits.h>\n";
240 print "#define SOPT_STRING \"" sopt_string "\"";
242 lopt_strings = "";
243 count = bucketsort(sorted_options, options);
244 for (i = 0; i < count; i++) {
245 lopt_strings = add_to_strtab(lopt_strings, sorted_options[i], offsets);
247 gsub("\1", bs"0"bs"\n", lopt_strings);
248 print "static const char lopt_strings[] = \"\\\n" lopt_strings "\";";
250 print "\nenum {"
251 for (i = 0; i < count; i++) {
252 opt = options[i]
253 sep = (i+1 == count ? "" : ",")
255 print "\t" to_enum(opt), "= UCHAR_MAX+1 +", offsets[opt] sep
257 print "};"
258 print "#define lopt_str(x) (lopt_strings + (LOPT_ ## x - UCHAR_MAX - 1))"
260 if (!has_actions) {
261 output_packed_macros()
264 print "\n#define LOPTS_INITIALIZER \\"
265 for (i = 0; i < count; i++) {
266 opt = options[i]
267 sep = (i+1 == count ? "" : ", \\")
269 print "\t/* --" opt, "*/ \\"
270 print "\t{ lopt_strings+" offsets[opt] ",", optionspec[opt] " }" sep
273 output_help_function();
276 # Emit the lopt_get_help function, for generating --help output from the
277 # options description.
278 function output_help_function(strings, count, i)
280 print "\nstatic struct lopt_help { const char *desc, *arg; }";
281 print "*lopt_get_help(const struct option *opt, struct lopt_help *out)";
282 print "{";
284 count = bucketsort(help_sorted, optionhelp);
285 for (i = 0; i < count; i++) {
286 strings = add_to_strtab(strings, help_sorted[i], help_offsets);
289 count = bucketsort(help_sorted, optionarg);
290 for (i = 0; i < count; i++) {
291 strings = add_to_strtab(strings, help_sorted[i], help_arg_offsets);
294 # ensure empty string is offsets table
295 strings = add_to_strtab(strings, "", help_offsets);
297 gsub("\"", bs"\"", strings);
298 gsub("\n", bs"n"bs"\n", strings);
299 gsub("\1", bs"0"bs"\n", strings);
300 print "\tstatic const char help_strings[] = \"\\\n" strings "\";";
301 print "#if 0";
302 for (i in options) {
303 i = options[i];
304 if (i in optionhelp) {
305 i = optionhelp[i];
306 gsub("\"", bs"\"", i);
307 gsub("\n", bs"n\"\n\t \"", i);
308 print "\tgettext(\"" i "\");";
309 } else {
310 print "\tpgettext(\"" i "\", \"\");";
313 for (i in optionarg) {
314 print "\tpgettext(\"" i "\", \"" optionarg[i] "\");";
316 print "#endif";
318 print "\n\tswitch((opt->name - lopt_strings) + UCHAR_MAX + 1) {";
319 for (i in optionhelp) {
320 print "\tcase " to_enum(i) ":";
321 print "\t\tout->desc = help_strings+" help_offsets[optionhelp[i]] ";";
322 if (i in optionarg) {
323 print "\t\tout->arg = help_strings+" help_arg_offsets[optionarg[i]] ";";
325 print "\t\treturn out;";
327 for (i in optionarg) {
328 if (!(i in optionhelp)) {
329 print "\tcase " to_enum(i) ":";
330 print "\t\tout->arg = help_strings+" help_arg_offsets[optionarg[i]] ";";
331 print "\t\tbreak;";
334 print "\t}";
336 for (i in options) {
337 if (!(options[i] in optionhelp)) {
338 print "\n\tout->desc = help_strings+" help_offsets[""] ";";
339 print "\treturn out;";
340 break;
344 print "}";
347 # Emit the packed initializer macros. This is used as an array initializer
348 # that encodes the following information:
350 # - short option character offset
351 # - arg value (0, 1 or 2), and
352 # - long option string offset
354 # as a single integer value for each option, in as few bits as practical.
356 # Currently, this only works if none of the options use action specifications
357 # (as these would require encoding user-specified pointer expressions and
358 # arbitrary int values).
359 function output_packed_macros(i, tmp, accum, max, totalbits)
361 print "";
363 # determine number of bits to encode offsets in SOPT_STRING
364 max = length(sopt_string);
365 totalbits = accum = 0;
366 for (i = 1; i <= max; i *= 2) {
367 accum++;
369 print "#define LOPT_SC_BITS " accum;
370 totalbits += accum;
372 # determine number of bits to encode has_arg values
373 max = 0;
374 for (i in optionspec) {
375 tmp = optionspec[i]; sub(/,.*/, "", tmp);
376 if (tmp > max)
377 max = tmp;
379 accum = (max > 1 ? 2 : max > 0 ? 1 : 0);
380 print "#define LOPT_HA_BITS " accum;
381 totalbits += accum;
383 # determine number of bits to encode offsets in lopt_strings
384 max = 0;
385 for (i in offsets) {
386 if (offsets[i] > max)
387 max = offsets[i];
390 accum = 0;
391 for (i = 1; i <= max; i *= 2) {
392 accum++;
394 print "#define LOPT_LS_BITS " accum;
395 totalbits += accum;
397 print "#define LOPT_PACK_BITS " totalbits;
398 for (i = 8; i < totalbits; i *= 2)
400 print "#define LOPT_PACK_BITS2 " i;
401 print "#define LOPT_PACK_TYPE uint_least" i "_t"
403 # Now emit the packed initializer macro
404 print "\n#define LOPTS_PACKED_INITIALIZER \\";
405 accum = "";
406 for (i = 0; i < count; i++) {
407 if (accum)
408 print "\t" accum ", \\";
410 tmp = options[i];
411 accum = "("offsets[tmp] "ul" "<<LOPT_HA_BITS)";
412 max = tmp = optionspec[tmp];
413 sub(/,.*/, "", max)
414 accum = "((" accum "|" max ")<<LOPT_SC_BITS)";
416 sub(/.*[, ]/, "", tmp);
417 if (tmp ~ /^[']/) {
418 tmp = index(sopt_string, substr(tmp, 2, 1)) - 1;
419 } else {
420 tmp = length(sopt_string);
422 accum = accum "|" tmp;
425 if (accum)
426 print "\t" accum;
428 # Finally, the unpack helper macros
429 tmp = "(x) & ((1ul<<LOPT_SC_BITS)-1)";
430 print "\n#define LOPT_UNPACK_VAL(x) \\"
431 print "\t( SOPT_STRING[" tmp "] \\";
432 print "\t? SOPT_STRING[" tmp "] \\";
433 print "\t: 1u + UCHAR_MAX + ((x)>>(LOPT_SC_BITS+LOPT_HA_BITS)))";
435 print "\n#define LOPT_UNPACK_ARG(x) \\";
436 print "\t(((x)>>LOPT_SC_BITS)&((1ul<<LOPT_HA_BITS)-1))";
438 print "\n#define LOPT_UNPACK_NAME(x) \\"
439 print "\t(lopt_strings+((x)>>(LOPT_SC_BITS+LOPT_HA_BITS)))";
441 print "\n#define LOPT_UNPACK(opt, x) do { \\";
442 print "\t(opt).name = LOPT_UNPACK_NAME(x); \\"
443 print "\t(opt).has_arg = LOPT_UNPACK_ARG(x); \\"
444 print "\t(opt).val = LOPT_UNPACK_VAL(x); \\"
445 print "} while (0)";
448 # bucketsort(dst, src)
450 # Sort the elements of src by descending string length,
451 # placing them into dst[0] ... dst[n].
453 # Returns the number of elements.
454 function bucketsort(dst, src, max, count, i, t)
456 # Note: ULTRIX 4.5 nawk does not support local array parameters
457 split("", bucketsort_buckets);
459 for (t in src) {
460 i = length(src[t])
461 if (i > max) { max = i }
462 bucketsort_buckets[i]++
465 for (i = max; i > 0; i--) {
466 if (i in bucketsort_buckets) {
467 t = bucketsort_buckets[i]
468 bucketsort_buckets[i] = count
469 count += t
473 for (t in src) {
474 i = length(t = src[t])
475 dst[bucketsort_buckets[i]++] = t
478 return count
481 # to_enum(lopt)
483 # Return the string LOPT_xxx, where xxx is the argument with all lowercase
484 # letters converted to uppercase, and all non-alphanumeric characters replaced
485 # with underscores.
486 function to_enum(lopt)
488 lopt = toupper(lopt)
489 gsub(/[^ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789]/, "_", lopt)
490 return "LOPT_" lopt
493 # add_to_strtab(strtab, str, offsets)
495 # Append string to strtab if there is not already a matching string present
496 # in the table. Newly-added strings are separated by "\1", which must be
497 # translated into null bytes afterwards. The updated strtab is returned, and
498 # the offsets[str] array member is updated with the position (counting from 0)
499 # of str in the strtab.
501 # For optimal results, strings should be added in descending length order.
502 function add_to_strtab(strtab, str, offsets, pos)
504 if ( (pos = index(strtab, str "\1") - 1) < 0) {
505 pos = length(strtab);
506 if (pos) {
507 strtab = strtab "\1" str;
508 pos++
509 } else {
510 strtab = strtab str;
514 offsets[str] = pos;
515 return strtab;