3 # Generate definitions helpful when using getopt_long from an options
6 # Copyright © 2021, 2023-2024 Nick Bowler
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 # The options specification file is processed line by line. Any line
22 # beginning with a - character introduces a new option definition. Each
23 # option definition specifies any or all of a short option name, a long
24 # option name, an argument specification, and an action specification.
26 # Only the long option name is mandatory. It is not possible to define
27 # short options without a corresponding long option.
29 # The optional short option name is first, and consists of a hyphen (which
30 # must be the first character on the line) followed by the one character
31 # short option name, followed by a comma.
33 # The long option name is next on the line, which consists of two hyphens
34 # followed by the desired option name. If the short option name was omitted,
35 # then the first hyphen of the long option name must be the first character
38 # The argument specification is next, consisting of an equals sign followed by
39 # the argument name. The argument name can be any sequence of non-whitespace
40 # characters and only relevant for --help text.
42 # If the argument specification is surrounded by square brackets, this
43 # indicates an optional argument. If the argument specification is omitted
44 # completely, this option has no argument. Otherwise, the option has a
47 # Finally, the optional action specification defines how the "flag" and
48 # "val" members are set in the option structure for this option. An action
49 # specification may only be provided for options with no short name.
51 # If the action specification is omitted, then flag will be set to a null
52 # pointer and val is set to the short option character, if any, otherwise the
53 # unique enumeration constant LOPT_xxx for this option (described below).
55 # The action specification can be of the form (val) or (flag, val), where flag
56 # and val are C expressions suitable for use in an initializer for objects
57 # with static storage duration. Neither flag nor val may contain commas or
58 # whitespace. In the first form, the option's flag is set to a null pointer.
60 # Any amount of whitespace may follow the short option name, the argument
61 # specification, the action specification, or the comma within an action
62 # specification. Whitespace is not permitted between a long option name
63 # and a flag specification.
65 # Examples of option specifications:
71 # --parse-only (&parse_only, 1)
73 # Each option is assigned an enumeration constant of the form LOPT_xxx,
74 # where xxx is the long option name with all letters in uppercase and
75 # all non-alphanumeric characters replaced with underscores. The value
76 # of the constants is unspecified, except that they will be unique across
77 # all defined options and distinct from the integer value of any short
80 # The object-like macro SOPT_STRING expands to a string literal suitable
81 # for use as the optstring argument to getopt et al.
83 # The object-like macro LOPTS_INITIALIZER expands to a comma-separated
84 # sequence of struct option initializers, suitable for use in a declaration
85 # of an array of struct option elements with static storage duration. The
86 # all-zero terminating element required by getopt_long must be added by the
89 # static const struct option lopts[] = { LOPTS_INITIALIZER, {0} };
91 # If none of the options have action specifications, then an alternate
92 # set of macros is also defined, which encode the struct option array
93 # into a more compact format that can be used to generate the full
94 # 'struct option' array at runtime:
96 # * the object-like macro LOPT_PACK_BITS expands to an integer constant
97 # expression, suitable for use in #if directives, that specifies the
98 # minimum number of bits required by the encoding. LOPT_PACK_BITS2
99 # is the same, but rounded up to the next power of two greater than
102 # * the object-like macro LOPTS_PACKED_INITIALIZER expands to a
103 # comma-separated sequence of integer constant expressions, suitable
104 # for initializing an array of integers. All values are less than
107 # * the function-like macro LOPT_UNPACK(opt, x), where opt is an
108 # lvalue of type 'struct option', and x is one of the array
109 # elements initialized by LOPTS_PACKED_INITIALIZER. This expands
110 # the encoded value and sets the name, has_arg and val members of
111 # opt appopriately. The caller should ensure that the flag member
114 # The help text for an individual struct option element may be obtained by
117 # struct lopt_help { const char *desc, *arg; }
118 # *lopt_get_help(const struct option *opt);
120 # The returned desc and arg pointers point to the argument name and help text
121 # for the argument, respectively, as written in the options specification file.
126 print " * Automatically generated by gen-options.awk from " FILENAME
128 print " * Automatically generated by gen-options.awk"
130 print " * Do not edit."
135 # Check if "\\\\" in substitutions gives just one backslash.
136 bs =
"x"; sub(/x
/, "\\\\", bs
);
137 bs =
(length(bs
) ==
1 ?
"\\\\" : "\\");
146 # Parse option specifier lines
149 arg = lopt = sopt =
""
152 # Extract short option name
153 if (work ~
/^
-[^
-]/) {
154 sopt =
substr(work
, 2, 1)
155 sub(/^
-.
,[ \t]*/, "", work
)
158 # Extract long option name
160 if (n =
match(work
, /[=
\t[]/)) {
161 lopt =
substr(work
, 3, n
-3)
162 work =
substr(work
, n
)
164 lopt =
substr(work
, 3)
169 # Extract argument name
170 if (work ~
/^\
[=
[^
\t]+\
]/ && sub(/\
]/, "&", work
) ==
1) {
171 if (n =
index(work
, "]")) {
172 arg =
substr(work
, 3, n
-3)
173 work =
substr(work
, n
+1)
176 } else if (work ~
/^=
/) {
177 if (n =
match(work
, /[ \t]/)) {
178 arg =
substr(work
, 2, n
-2)
179 work =
substr(work
, n
)
181 arg =
substr(work
, 2)
188 sub(/^
[ \t]*/, "", work
)
189 if (!sopt
&& work ~
/^\
([^
, \t]+(,[ \t]*[^
, \t]+)?\
)/) {
190 # packed form is not possible w/ actions
193 n =
split(work
, a
, ",[ \t]*")
195 flag =
substr(a
[1], 2) ", " substr(a
[2], 1, length(a
[2])-1)
197 flag =
"0, " substr(a
[1], 2, length(a
[1])-2)
199 sub(/^\
([^
, \t]+(,[ \t]*[^
, \t]+)?
/, "", work
)
201 flag =
"0, '" sopt
"'"
203 flag =
"0, " to_enum
(lopt
)
207 print "invalid option specification:", $
0 > "/dev/stderr"
213 sopt_string = sopt_string sopt
substr("::", 1, has_arg
)
215 options
[num_options
++] = lopt
216 optionspec
[lopt
] = has_arg
", " flag
218 optionarg
[lopt
] = arg
224 # Ignore any line beginning with a #
230 if (lopt in optionhelp
)
232 optionhelp
[lopt
] = optionhelp
[lopt
] $
0;
236 # Exit immediately on error
239 print "#include <limits.h>\n";
240 print "#define SOPT_STRING \"" sopt_string
"\"";
243 count = bucketsort
(sorted_options
, options
);
244 for (i =
0; i
< count
; i
++) {
245 lopt_strings = add_to_strtab
(lopt_strings
, sorted_options
[i
], offsets
);
247 gsub("\1", bs
"0"bs
"\n", lopt_strings
);
248 print "static const char lopt_strings[] = \"\\\n" lopt_strings
"\";";
251 for (i =
0; i
< count
; i
++) {
253 sep =
(i
+1 == count ?
"" : ",")
255 print "\t" to_enum
(opt
), "= UCHAR_MAX+1 +", offsets
[opt
] sep
258 print "#define lopt_str(x) (lopt_strings + (LOPT_ ## x - UCHAR_MAX - 1))"
261 output_packed_macros
()
264 print "\n#define LOPTS_INITIALIZER \\"
265 for (i =
0; i
< count
; i
++) {
267 sep =
(i
+1 == count ?
"" : ", \\")
269 print "\t/* --" opt
, "*/ \\"
270 print "\t{ lopt_strings+" offsets
[opt
] ",", optionspec
[opt
] " }" sep
273 output_help_function
();
276 # Emit the lopt_get_help function, for generating --help output from the
277 # options description.
278 function output_help_function
(strings
, count
, i
)
280 print "\nstatic struct lopt_help { const char *desc, *arg; }";
281 print "*lopt_get_help(const struct option *opt, struct lopt_help *out)";
284 count = bucketsort
(help_sorted
, optionhelp
);
285 for (i =
0; i
< count
; i
++) {
286 strings = add_to_strtab
(strings
, help_sorted
[i
], help_offsets
);
289 count = bucketsort
(help_sorted
, optionarg
);
290 for (i =
0; i
< count
; i
++) {
291 strings = add_to_strtab
(strings
, help_sorted
[i
], help_arg_offsets
);
294 # ensure empty string is offsets table
295 strings = add_to_strtab
(strings
, "", help_offsets
);
297 gsub("\"", bs
"\"", strings
);
298 gsub("\n", bs
"n"bs
"\n", strings
);
299 gsub("\1", bs
"0"bs
"\n", strings
);
300 print "\tstatic const char help_strings[] = \"\\\n" strings
"\";";
304 if (i in optionhelp
) {
306 gsub("\"", bs
"\"", i
);
307 gsub("\n", bs
"n\"\n\t \"", i
);
308 print "\tgettext(\"" i
"\");";
310 print "\tpgettext(\"" i
"\", \"\");";
313 for (i in optionarg
) {
314 print "\tpgettext(\"" i
"\", \"" optionarg
[i
] "\");";
318 print "\n\tswitch((opt->name - lopt_strings) + UCHAR_MAX + 1) {";
319 for (i in optionhelp
) {
320 print "\tcase " to_enum
(i
) ":";
321 print "\t\tout->desc = help_strings+" help_offsets
[optionhelp
[i
]] ";";
322 if (i in optionarg
) {
323 print "\t\tout->arg = help_strings+" help_arg_offsets
[optionarg
[i
]] ";";
325 print "\t\treturn out;";
327 for (i in optionarg
) {
328 if (!
(i in optionhelp
)) {
329 print "\tcase " to_enum
(i
) ":";
330 print "\t\tout->arg = help_strings+" help_arg_offsets
[optionarg
[i
]] ";";
337 if (!
(options
[i
] in optionhelp
)) {
338 print "\n\tout->desc = help_strings+" help_offsets
[""] ";";
339 print "\treturn out;";
347 # Emit the packed initializer macros. This is used as an array initializer
348 # that encodes the following information:
350 # - short option character offset
351 # - arg value (0, 1 or 2), and
352 # - long option string offset
354 # as a single integer value for each option, in as few bits as practical.
356 # Currently, this only works if none of the options use action specifications
357 # (as these would require encoding user-specified pointer expressions and
358 # arbitrary int values).
359 function output_packed_macros
(i
, tmp
, accum
, max
, totalbits
)
363 # determine number of bits to encode offsets in SOPT_STRING
364 max =
length(sopt_string
);
365 totalbits = accum =
0;
366 for (i =
1; i
<= max
; i
*=
2) {
369 print "#define LOPT_SC_BITS " accum
;
372 # determine number of bits to encode has_arg values
374 for (i in optionspec
) {
375 tmp = optionspec
[i
]; sub(/,.
*/, "", tmp
);
379 accum =
(max
> 1 ?
2 : max
> 0 ?
1 : 0);
380 print "#define LOPT_HA_BITS " accum
;
383 # determine number of bits to encode offsets in lopt_strings
386 if (offsets
[i
] > max
)
391 for (i =
1; i
<= max
; i
*=
2) {
394 print "#define LOPT_LS_BITS " accum
;
397 print "#define LOPT_PACK_BITS " totalbits
;
398 for (i =
8; i
< totalbits
; i
*=
2)
400 print "#define LOPT_PACK_BITS2 " i
;
401 print "#define LOPT_PACK_TYPE uint_least" i
"_t"
403 # Now emit the packed initializer macro
404 print "\n#define LOPTS_PACKED_INITIALIZER \\";
406 for (i =
0; i
< count
; i
++) {
408 print "\t" accum
", \\";
411 accum =
"("offsets
[tmp
] "ul" "<<LOPT_HA_BITS)";
412 max = tmp = optionspec
[tmp
];
414 accum =
"((" accum
"|" max
")<<LOPT_SC_BITS)";
416 sub(/.
*[, ]/, "", tmp
);
418 tmp = index(sopt_string, substr(tmp, 2, 1)) - 1;
420 tmp = length(sopt_string);
422 accum = accum "|" tmp;
428 # Finally, the unpack helper macros
429 tmp = "(x) & ((1ul<<LOPT_SC_BITS)-1)";
430 print "\n#define LOPT_UNPACK_VAL(x) \\"
431 print "\t( SOPT_STRING[" tmp "] \\";
432 print "\t? SOPT_STRING[" tmp "] \\";
433 print "\t: 1u + UCHAR_MAX + ((x)>>(LOPT_SC_BITS+LOPT_HA_BITS)))";
435 print "\n#define LOPT_UNPACK_ARG(x) \\";
436 print "\t(((x)>>LOPT_SC_BITS)&((1ul<<LOPT_HA_BITS)-1))";
438 print "\n#define LOPT_UNPACK_NAME(x) \\"
439 print "\t(lopt_strings+((x)>>(LOPT_SC_BITS+LOPT_HA_BITS)))";
441 print "\n#define LOPT_UNPACK(opt, x) do { \\";
442 print "\t(opt).name = LOPT_UNPACK_NAME(x); \\"
443 print "\t(opt).has_arg = LOPT_UNPACK_ARG(x); \\"
444 print "\t(opt).val = LOPT_UNPACK_VAL(x); \\"
448 # bucketsort(dst, src)
450 # Sort the elements of src by descending string length,
451 # placing them into dst[0] ... dst[n].
453 # Returns the number of elements.
454 function bucketsort(dst, src, max, count, i, t)
456 # Note: ULTRIX 4.5 nawk does not support local array parameters
457 split("", bucketsort_buckets);
461 if (i > max) { max = i }
462 bucketsort_buckets[i]++
465 for (i = max; i > 0; i--) {
466 if (i in bucketsort_buckets) {
467 t = bucketsort_buckets[i]
468 bucketsort_buckets[i] = count
474 i = length(t = src[t])
475 dst[bucketsort_buckets[i]++] = t
483 # Return the string LOPT_xxx, where xxx is the argument with all lowercase
484 # letters converted to uppercase, and all non-alphanumeric characters replaced
486 function to_enum(lopt)
489 gsub(/[^ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789]/, "_", lopt)
493 # add_to_strtab(strtab, str, offsets)
495 # Append string to strtab if there is not already a matching string present
496 # in the table. Newly-added strings are separated by "\1", which must be
497 # translated into null bytes afterwards. The updated strtab is returned, and
498 # the offsets[str] array member is updated with the position (counting from 0)
499 # of str in the strtab.
501 # For optimal results, strings should be added in descending length order.
502 function add_to_strtab(strtab, str, offsets, pos)
504 if ( (pos = index(strtab, str "\1") - 1) < 0) {
505 pos = length(strtab);
507 strtab = strtab "\1" str;