3 # Generate one or more C array initializers to encode simple tree structures
6 # Copyright © 2021-2024 Nick Bowler
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 # Each nonempty line of the input file is either a comment, an option
22 # specification or a tree specification. Each line is distinguished by
23 # its first character. A # character introduces a comment, an @ character
24 # introduces an option specification (described later), and all other
25 # nonempty lines are tree nodes.
27 # The first field of a tree specification must be a valid C identifier,
28 # optionally followed by a comma. The identifiers used on non-leaf nodes
29 # must be unique with respect to other non-leaf nodes, but leaf nodes may
30 # share the same identifier as other nodes.
32 # The tree structure is expressed through indentation. Lines with no leading
33 # whitespace designate root nodes. When the number of white-space characters
34 # at the beginning of a line is greater than that of the previous line, this
35 # introduces a new subtree rooted at the previous node. When the number of
36 # leading white-space characters is less than the previous line, this concludes
37 # the definition of the current subtree.
39 # For example, the input on the left describes the tree on the right.
41 # INPUT: root TREE: root -- a -- b -- c
50 # For each root node X, the object-like macro X_INITIALIZER is defined. This
51 # can be used to initialize an array with static storage duration. Each line
52 # of the input is defines the initializer for one array element. These are
53 # then ordered topologically beginning with the root, but the root itself is
54 # not included. Each subtree is terminated by an "empty" element (with an
55 # initializer of {0}).
57 # If there is a comma after the identifier which begins a tree element, or
58 # if the identifier is the only text on the line, then the element initializer
59 # is constructed by surrounding the entire line by braces. Otherwise, the
60 # identifier is removed and the remainder of the line is surrounded by braces
61 # to construct the initializer. This allows the exact form of the initializer
64 # For example, the above input will produce this (or an equivalent topological
65 # ordering) initializer:
67 # OUTPUT: #define root_INITIALIZER \
68 # {a}, {e}, {0}, {b}, {d}, {0}, {f}, {g}, {0}, {c}, {0}
70 # Constants representing the array offset for each subtree are additionally
71 # defined. For a non-leaf node X, the enumeration constant X_OFFSET gives the
72 # index of the first node rooted at X. With the above example, the results
75 # OUTPUT: enum { a_OFFSET = 3, b_OFFSET = 9, e_OFFSET = 6 };
77 # By using an array of structures to represent the tree and including these
78 # constants in initializers, it is possible for the flattened tree to describe
81 # Finally, each node identifier is defined as an enumeration constant
82 # representing the nonzero offset into a string table tree_strtab. These
83 # constants will normally be in scope when using the root_INITIALIZER macro,
84 # which means the resulting array initializers will use them.
86 # Options may be used to alter the normal behaviour. They may be placed
87 # anywhere in the input file. The following options are defined:
90 # Do not output a definition for tree_strtab or its associated
91 # enumeration constants.
96 print " * Automatically generated by gen-tree.awk from " FILENAME
98 print " * Automatically generated by gen-tree.awk"
100 print " * Do not edit."
105 # Check if "\\\\" in substitutions gives just one backslash.
106 bs =
"x"; sub(/x
/, "\\\\", bs
);
107 bs =
(length(bs
) ==
1 ?
"\\\\" : "\\");
111 depth = max_depth =
0;
129 val = !
sub(/^no_?
/, "", $
1);
133 print "error: unrecognized option: @" orig
| "cat 1>&2"
140 { indent =
index($
0, $
1) - 1 }
143 if (indent
> indent_stack
[depth
]) {
144 indent_stack
[++depth
] = indent
;
146 subtree_offsets
[entry_name
] = level_count
[depth
];
147 subtree_depth
[entry_name
] = depth
;
150 while (indent
< indent_stack
[depth
]) {
151 tree_items
[depth
] = tree_items
[depth
] ", \\\n\t{ 0 }";
152 level_count
[depth
]++;
157 entry_name = $
1; sub(/,$
/, "", entry_name
);
158 all_items
[num_entries
++] = entry_name
;
160 # Construct the element initializer for this tree node.
163 # Check if entry name is followed by a comma. If it is not, the entry
164 # name is excluded from the initializer.
166 gsub(/[ \t]/, "", check_str
);
167 if (index(check_str
, entry_name
",") ==
0) {
171 $
1 =
", \\\n\t{" $
1; $
NF = $
NF " }";
173 tree_items
[depth
] = tree_items
[depth
] $
0;
174 level_count
[depth
]++;
177 indent ==
0 && tree_identifier
!= "" {
178 trees
[tree_identifier
] = format_items
();
179 tree_identifier =
"";
182 if (tree_identifier
!= "")
183 trees
[tree_identifier
] = format_items
()
185 indent ==
0 { tree_identifier = $
1 }
188 prefix =
"\nenum {\n";
190 if (opts
["strtab"]) {
192 bucketsort
(sorted_items
, all_items
);
193 for (i =
0; i
< num_entries
; i
++) {
195 if ((n =
index(entry_strtab
, s
"\1")) > 0) {
196 entry_offsets
[s
] = n
-1;
198 entry_offsets
[s
] =
length(entry_strtab
);
199 entry_strtab = entry_strtab s
"\1";
203 gsub("\1", bs
"\n"bs
"0", entry_strtab
);
204 print "\nstatic const char tree_strtab[] = \"" entry_strtab
"\";";
206 for (item in entry_offsets
) {
207 printf "%s\t%s = %d", prefix
, item
, entry_offsets
[item
];
212 for (i in subtree_offsets
) {
213 printf "%s\t%s_OFFSET = %d", prefix
, i
, subtree_offsets
[i
];
216 if (!
index(prefix
, "enum")) {
222 for (tree in trees
) {
223 print "\n" trees
[tree
];
227 function format_items
(s
, i
)
230 tree_items
[depth
] = tree_items
[depth
] ", \\\n\t{ 0 }";
231 level_count
[depth
]++;
235 for (i =
2; tree_items
[i
] != ""; i
++) {
236 level_count
[i
] += level_count
[i
-1];
239 for (i in subtree_depth
) {
240 subtree_offsets
[i
] += level_count
[subtree_depth
[i
]-1];
241 delete subtree_depth
[i
];
244 for (i =
1; tree_items
[i
] != ""; i
++) {
246 delete tree_items
[i
];
247 delete level_count
[i
];
251 return "#define " tree_identifier
"_INITIALIZER" s
;
254 # bucketsort(dst, src)
256 # Sort the elements of src by descending string length,
257 # placing them into dst[0] ... dst[n].
259 # Returns the number of elements.
260 function bucketsort
(dst
, src
, max
, count
, i
, t
)
262 # Note: ULTRIX 4.5 nawk does not support local array parameters
263 split("", bucketsort_buckets
);
267 if (i
> max
) { max = i
}
268 bucketsort_buckets
[i
]++
271 for (i = max
; i
> 0; i
--) {
272 if (i in bucketsort_buckets
) {
273 t = bucketsort_buckets
[i
]
274 bucketsort_buckets
[i
] = count
280 i =
length(t = src
[t
])
281 dst
[bucketsort_buckets
[i
]++] = t