gen-tree.awk: Improve compatibility with vintage compilers.
[dxcommon.git] / scripts / gen-tree.awk
blob1c80fafaf6753a6f78d032fff86cea875ac78ba7
1 #!/bin/awk -f
3 # Generate one or more C array initializers to encode simple tree structures
4 # in a compact format.
6 # Copyright © 2021-2024 Nick Bowler
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 # Each nonempty line of the input file is either a comment, an option
22 # specification or a tree specification. Each line is distinguished by
23 # its first character. A # character introduces a comment, an @ character
24 # introduces an option specification (described later), and all other
25 # nonempty lines are tree nodes.
27 # The first field of a tree specification must be a valid C identifier,
28 # optionally followed by a comma. The identifiers used on non-leaf nodes
29 # must be unique with respect to other non-leaf nodes, but leaf nodes may
30 # share the same identifier as other nodes.
32 # The tree structure is expressed through indentation. Lines with no leading
33 # whitespace designate root nodes. When the number of white-space characters
34 # at the beginning of a line is greater than that of the previous line, this
35 # introduces a new subtree rooted at the previous node. When the number of
36 # leading white-space characters is less than the previous line, this concludes
37 # the definition of the current subtree.
39 # For example, the input on the left describes the tree on the right.
41 # INPUT: root TREE: root -- a -- b -- c
42 # a | |
43 # b | + -- d
44 # c |
45 # d + -- e -- f
46 # e |
47 # f + -- g
48 # g
50 # For each root node X, the object-like macro X_INITIALIZER is defined. This
51 # can be used to initialize an array with static storage duration. Each line
52 # of the input is defines the initializer for one array element. These are
53 # then ordered topologically beginning with the root, but the root itself is
54 # not included. Each subtree is terminated by an "empty" element (with an
55 # initializer of {0}).
57 # If there is a comma after the identifier which begins a tree element, or
58 # if the identifier is the only text on the line, then the element initializer
59 # is constructed by surrounding the entire line by braces. Otherwise, the
60 # identifier is removed and the remainder of the line is surrounded by braces
61 # to construct the initializer. This allows the exact form of the initializer
62 # to be customized.
64 # For example, the above input will produce this (or an equivalent topological
65 # ordering) initializer:
67 # OUTPUT: #define root_INITIALIZER \
68 # {a}, {e}, {0}, {b}, {d}, {0}, {f}, {g}, {0}, {c}, {0}
70 # Constants representing the array offset for each subtree are additionally
71 # defined. For a non-leaf node X, the enumeration constant X_OFFSET gives the
72 # index of the first node rooted at X. With the above example, the results
73 # are:
75 # OUTPUT: enum { a_OFFSET = 3, b_OFFSET = 9, e_OFFSET = 6 };
77 # By using an array of structures to represent the tree and including these
78 # constants in initializers, it is possible for the flattened tree to describe
79 # its own structure.
81 # Finally, each node identifier is defined as an enumeration constant
82 # representing the nonzero offset into a string table tree_strtab. These
83 # constants will normally be in scope when using the root_INITIALIZER macro,
84 # which means the resulting array initializers will use them.
86 # Options may be used to alter the normal behaviour. They may be placed
87 # anywhere in the input file. The following options are defined:
89 # @nostrtab
90 # Do not output a definition for tree_strtab or its associated
91 # enumeration constants.
93 END {
94 print "/*"
95 if (FILENAME) {
96 print " * Automatically generated by gen-tree.awk from " FILENAME
97 } else {
98 print " * Automatically generated by gen-tree.awk"
100 print " * Do not edit."
101 print " */"
104 BEGIN {
105 # Check if "\\\\" in substitutions gives just one backslash.
106 bs = "x"; sub(/x/, "\\\\", bs);
107 bs = (length(bs) == 1 ? "\\\\" : "\\");
109 opts["strtab"] = 1;
111 depth = max_depth = 0;
112 num_entries = 0;
114 tree_name = "";
115 entry_name = "";
117 indent_stack[0] = 0;
120 # Comments
121 NF == 0 { next }
122 $0 ~ /^#/ { next }
124 # Options
125 sub(/^@/, "", $0) {
126 if (NF == 1) {
127 orig=$1
128 gsub(/-/, "_", $1);
129 val = !sub(/^no_?/, "", $1);
130 if ($1 in opts) {
131 opts[$1] = val;
132 } else {
133 print "error: unrecognized option: @" orig | "cat 1>&2"
134 exit 1
137 next
140 { indent = index($0, $1) - 1 }
142 indent > 0 {
143 if (indent > indent_stack[depth]) {
144 indent_stack[++depth] = indent;
145 if (depth > 1) {
146 subtree_offsets[entry_name] = level_count[depth];
147 subtree_depth[entry_name] = depth;
149 } else {
150 while (indent < indent_stack[depth]) {
151 tree_items[depth] = tree_items[depth] ", \\\n\t{ 0 }";
152 level_count[depth]++;
153 depth--;
157 entry_name = $1; sub(/,$/, "", entry_name);
158 all_items[num_entries++] = entry_name;
160 # Construct the element initializer for this tree node.
161 $1 = " " $1;
162 if (NF > 1) {
163 # Check if entry name is followed by a comma. If it is not, the entry
164 # name is excluded from the initializer.
165 check_str = $1$2;
166 gsub(/[ \t]/, "", check_str);
167 if (index(check_str, entry_name ",") == 0) {
168 $1 = "";
171 $1 = ", \\\n\t{" $1; $NF = $NF " }";
173 tree_items[depth] = tree_items[depth] $0;
174 level_count[depth]++;
177 indent == 0 && tree_identifier != "" {
178 trees[tree_identifier] = format_items();
179 tree_identifier = "";
181 END {
182 if (tree_identifier != "")
183 trees[tree_identifier] = format_items()
185 indent == 0 { tree_identifier = $1 }
187 END {
188 prefix = "\nenum {\n";
190 if (opts["strtab"]) {
191 entry_strtab = "\1";
192 bucketsort(sorted_items, all_items);
193 for (i = 0; i < num_entries; i++) {
194 s = sorted_items[i];
195 if ((n = index(entry_strtab, s "\1")) > 0) {
196 entry_offsets[s] = n-1;
197 } else {
198 entry_offsets[s] = length(entry_strtab);
199 entry_strtab = entry_strtab s "\1";
203 gsub("\1", bs"\n"bs"0", entry_strtab);
204 print "\nstatic const char tree_strtab[] = \"" entry_strtab "\";";
206 for (item in entry_offsets) {
207 printf "%s\t%s = %d", prefix, item, entry_offsets[item];
208 prefix = ",\n";
212 for (i in subtree_offsets) {
213 printf "%s\t%s_OFFSET = %d", prefix, i, subtree_offsets[i];
214 prefix = ",\n";
216 if (!index(prefix, "enum")) {
217 print "\n};";
221 END {
222 for (tree in trees) {
223 print "\n" trees[tree];
227 function format_items(s, i)
229 while (depth > 0) {
230 tree_items[depth] = tree_items[depth] ", \\\n\t{ 0 }";
231 level_count[depth]++;
232 depth--;
235 for (i = 2; tree_items[i] != ""; i++) {
236 level_count[i] += level_count[i-1];
239 for (i in subtree_depth) {
240 subtree_offsets[i] += level_count[subtree_depth[i]-1];
241 delete subtree_depth[i];
244 for (i = 1; tree_items[i] != ""; i++) {
245 s = s tree_items[i];
246 delete tree_items[i];
247 delete level_count[i];
250 sub(/^,/, "", s);
251 return "#define " tree_identifier "_INITIALIZER" s;
254 # bucketsort(dst, src)
256 # Sort the elements of src by descending string length,
257 # placing them into dst[0] ... dst[n].
259 # Returns the number of elements.
260 function bucketsort(dst, src, max, count, i, t)
262 # Note: ULTRIX 4.5 nawk does not support local array parameters
263 split("", bucketsort_buckets);
265 for (t in src) {
266 i = length(src[t])
267 if (i > max) { max = i }
268 bucketsort_buckets[i]++
271 for (i = max; i > 0; i--) {
272 if (i in bucketsort_buckets) {
273 t = bucketsort_buckets[i]
274 bucketsort_buckets[i] = count
275 count += t
279 for (t in src) {
280 i = length(t = src[t])
281 dst[bucketsort_buckets[i]++] = t
284 return count