3 # Copyright © 2021, 2023-2024 Nick Bowler
5 # Generate a C string table based on an input string specification file.
7 # A string table is a single large char single array containing all of
8 # the specified (0-terminated) strings, which is then offset to obtain
9 # the desired string. By storing these offsets instead of string pointers
10 # into read-only data structures, this can reduce the need for relocation
11 # processing at startup when programs are built in PIC mode.
13 # The string specification file is processed line by line. Comment
14 # lines may be included by beginning the line with a # character, which
15 # must be the very first character on the line. If a comment is encountered,
16 # processing immediately moves on to the next line and the result is as if
17 # the comment line were omitted from the input.
19 # Options may be used to alter the normal behaviour. An option is placed
20 # on a line by itself beginning with an @ character, and may appear anywhere
21 # in the input file. The following options are defined:
24 # All strings will have a non-zero offset in the strtab.
27 # Instead of a variable declaration, the generated header will define an
28 # object-like macro that can be used as the initializer for a char array.
30 # A string is defined by beginning a line with one or two & characters, which
31 # must be immediately followed by a C identifier. Two & characters indicates
32 # a string that should not be translated, as described below. A nonempty
33 # sequence of whitespace (with at most one newline) separates the identifier
34 # from the beginning of the string itself. This whitespace is never included
37 # The string is then interpreted as follows:
39 # - Leading blanks on each line are ignored.
40 # - The sequences \\, \a, \b, \t, \n, \v, \f and \r can be entered and
41 # mean the same as they do in C string literals. The "\\" sequence
42 # prevents any special interpretation of the second backslash.
43 # - Newlines in the input are included in the output, except for the
44 # where the entire string (including its identifier) are on one line.
45 # - If this is not desired, a newline which is immediately preceded by an
46 # unescaped backslash will deleted, along with the backslash.
47 # - All other backslashes are deleted. This can be used to prevent special
48 # handling of whitespace, # or & characters at the beginning of a line.
50 # Unless the @macro option is specified, the output defines a variable,
51 # strtab, which contains all of the strings, and each identifier in the input
52 # is declared as an emumeration constant whose value is the offset of the
53 # associated string within strtab. Otherwise, if the @macro option is
54 # specified, no variables are defined and STRTAB_INITIALIZER object-like macro
55 # may be used to initialize a char array with static storage duration.
57 # Normally, the generated source code wraps strings using the identity macro
58 # N_(x), which has no effect on the resulting data structures but enables tools
59 # such as xgettext to extract translatable strings from the source code. An
60 # identifier preceded by two ampersands (&&) suppresses this output to allow
61 # a single string table to also contain both translateable strings as well as
62 # ones that should not be translated.
64 # The object-like macro STRTAB_MAX_OFFSET is defined and expands to the
65 # greatest string offset, suitable for use in #if preprocessing directives.
67 # License WTFPL2: Do What The Fuck You Want To Public License, version 2.
68 # This is free software: you are free to do what the fuck you want to.
69 # There is NO WARRANTY, to the extent permitted by law.
74 print " * Automatically generated by gen-strtab.awk from " FILENAME
76 print " * Automatically generated by gen-strtab.awk"
78 print " * Do not edit."
83 # Check if "\\\\" in substitutions gives just one backslash.
84 bs =
"x"; sub(/x
/, "\\\\", bs
);
85 bs =
(length(bs
) ==
1 ?
"\\\\" : "\\");
89 collected = ident =
""
90 startline = endline =
0
95 NF ==
0 || $
0 ~
/^
[#]/ { next }
102 val = !
sub(/^no_?
/, "", $
1);
106 print "error: unrecognized option: @" orig
| "cat 1>&2"
115 finish_string_input
(strings
, ident
, collected
);
116 vars
[num_vars
++] = ident
;
119 current_l10n = !
sub(/^
[&]/, "");
130 sep = collected
!= "" ?
"\n" : "";
131 collected = collected sep $
0;
137 finish_string_input
(strings
, ident
, collected
)
138 vars
[num_vars
++] = ident
145 count = bucketsort
(sorted_strings
, strings
)
148 print "\n#define STR_L10N_(x)"
150 print "# define N_(x) x"
154 print "\n#define STRTAB_INITIALIZER" cont
;
156 print "\nstatic const char strtab[] =";
159 for (i =
0; i
< count
; i
++) {
160 s = sorted_strings
[i
]
161 gsub(/\\\\/, "\2", s
)
162 if ((n =
index(strtab
"\1", s
"\1")) > 0) {
163 offsets
[sorted_strings
[i
]] = real_length
(substr(strtab
, 1, n
-1));
164 if (!
(sorted_strings
[i
] in nol10n
))
165 print "\tSTR_L10N_(N_(\"" sorted_strings
[i
] "\"))" cont
;
167 strtab = strtab
"\1" s
168 offsets
[sorted_strings
[i
]] = strtab_len
+ 1
169 strtab_len
+= real_length
(s
) + 1
172 offsets
[sorted_strings
[i
]] =
0
173 strtab_len
+= real_length
(s
)
177 gsub("\2", bs bs
, strtab
);
178 n =
split(strtab
, split_strtab
, "\1");
179 for (i =
1; i
<= n
; i
++) {
180 printf("\t%4s ", i
> !!opts
["zero"] ?
"\"\\0\"" : "");
182 if (split_strtab
[i
] in nol10n
) {
183 print "\"" split_strtab
[i
] "\"" cont
;
185 print "N_(\"" split_strtab
[i
] "\")" cont
;
188 print "\t\"\"" substr(";", 1, !opts
["macro"]);
191 for (i =
0; i
< num_vars
; i
++) {
192 sep =
(i
+1) != num_vars ?
"," : ""
194 o = offsets
[strings
[s
]] + (!opts
["zero"])
195 print "\t" s
" = " o sep
201 print "\n#define STRTAB_MAX_OFFSET " max
204 # finish_string_input(strings, ident, val)
206 # Deal with backslash-escapes and special characters in val, then set
207 # strings[ident] = val.
208 function finish_string_input
(strings
, ident
, val
, n
, tmpval
)
210 gsub(/\\\\/, "\2", val
);
211 if (endline
> startline
)
213 gsub(/\\\n/, "", val
);
216 while ((n =
match(val
, /\\[^abtnvfr
]/)) > 0) {
217 tmpval = tmpval
substr(val
, 1, n
-1);
218 val =
substr(val
, n
+1);
222 # Escape special characters
223 gsub(/"/, bs"\"", tmpval);
224 gsub(/\t/, bs"t
", tmpval);
225 gsub(/\n/, bs"n
", tmpval);
226 gsub("\
2", bs bs, tmpval);
228 strings[ident] = tmpval;
234 function real_length(s, t)
237 return t - gsub(/\\./, "&", s)
240 # bucketsort(dst, src)
242 # Sort the elements of src by descending string length,
243 # placing them into dst[0] ... dst[n].
245 # Returns the number of elements.
246 function bucketsort(dst, src, max, count, i, t)
248 # Note: ULTRIX 4.5 nawk does not support local array parameters
249 split("", bucketsort_buckets);
253 if (i > max) { max = i }
254 bucketsort_buckets[i]++
257 for (i = max; i > 0; i--) {
258 if (i in bucketsort_buckets) {
259 t = bucketsort_buckets[i]
260 bucketsort_buckets[i] = count
266 i = length(t = src[t])
267 dst[bucketsort_buckets[i]++] = t