3 # Copyright © 2021-2022 Nick Bowler
5 # Generate one or more C array initializers to encode simple tree structures
8 # Each nonempty line of the input file is either a comment, an option
9 # specification or a tree specification. Each line is distinguished by
10 # its first character. A # character introduces a comment, an @ character
11 # introduces an option specification (described later), and all other
12 # nonempty lines are tree nodes.
14 # The first field of a tree specification must be a valid C identifier,
15 # optionally followed by a comma. The identifiers used on non-leaf nodes
16 # must be unique with respect to other non-leaf nodes, but leaf nodes may
17 # share the same identifier as other nodes.
19 # The tree structure is expressed through indentation. Lines with no leading
20 # whitespace designate root nodes. When the number of white-space characters
21 # at the beginning of a line is greater than that of the previous line, this
22 # introduces a new subtree rooted at the previous node. When the number of
23 # leading white-space characters is less than the previous line, this concludes
24 # the definition of the current subtree.
26 # For example, the input on the left describes the tree on the right.
28 # INPUT: root TREE: root -- a -- b -- c
37 # For each root node X, the object-like macro X_INITIALIZER is defined. This
38 # can be used to initialize an array with static storage duration. Each line
39 # of the input is defines the initializer for one array element. These are
40 # then ordered topologically beginning with the root, but the root itself is
41 # not included. Each subtree is terminated by an "empty" element (with an
42 # initializer of {0}).
44 # If there is a comma after the identifier which begins a tree element, or
45 # if the identifier is the only text on the line, then the element initializer
46 # is constructed by surrounding the entire line by braces. Otherwise, the
47 # identifier is removed and the remainder of the line is surrounded by braces
48 # to construct the initializer. This allows the exact form of the initializer
51 # For example, the above input will produce this (or an equivalent topological
52 # ordering) initializer:
54 # OUTPUT: #define root_INITIALIZER \
55 # {a}, {e}, {0}, {b}, {d}, {0}, {f}, {g}, {0}, {c}, {0}
57 # Constants representing the array offset for each subtree are additionally
58 # defined. For a non-leaf node X, the enumeration constant X_OFFSET gives the
59 # index of the first node rooted at X. With the above example, the results
62 # OUTPUT: enum { a_OFFSET = 3, b_OFFSET = 9, e_OFFSET = 6 };
64 # By using an array of structures to represent the tree and including these
65 # constants in initializers, it is possible for the flattened tree to describe
68 # Finally, each node identifier is defined as an enumeration constant
69 # representing the nonzero offset into a string table tree_strtab. These
70 # constants will normally be in scope when using the root_INITIALIZER macro,
71 # which means the resulting array initializers will use them.
73 # Options may be used to alter the normal behaviour. They may be placed
74 # anywhere in the input file. The following options are defined:
77 # Do not output a definition for tree_strtab or its associated
78 # enumeration constants.
80 # License WTFPL2: Do What The Fuck You Want To Public License, version 2.
81 # This is free software: you are free to do what the fuck you want to.
82 # There is NO WARRANTY, to the extent permitted by law.
87 print " * Automatically generated by gen-tree.awk from " FILENAME
89 print " * Automatically generated by gen-tree.awk"
91 print " * Do not edit."
96 # Check if "\\\\" in substitutions gives just one backslash.
97 bs =
"x"; sub(/x
/, "\\\\", bs
);
98 bs =
(length(bs
) ==
1 ?
"\\\\" : "\\");
102 depth = max_depth =
0;
120 val = !
sub(/^no_?
/, "", $
1);
124 print "error: unrecognized option: @" orig
| "cat 1>&2"
131 { indent =
index($
0, $
1) - 1 }
134 if (indent
> indent_stack
[depth
]) {
135 indent_stack
[++depth
] = indent
;
137 subtree_offsets
[entry_name
] = level_count
[depth
];
138 subtree_depth
[entry_name
] = depth
;
141 while (indent
< indent_stack
[depth
]) {
142 tree_items
[depth
] = tree_items
[depth
] ", \\\n\t{ 0 }";
143 level_count
[depth
]++;
148 entry_name = $
1; sub(/,$
/, "", entry_name
);
149 all_items
[num_entries
++] = entry_name
;
151 # Construct the element initializer for this tree node.
154 # Check if entry name is followed by a comma. If it is not, the entry
155 # name is excluded from the initializer.
157 gsub(/[ \t]/, "", check_str
);
158 if (index(check_str
, entry_name
",") ==
0) {
162 $
1 =
", \\\n\t{" $
1; $
NF = $
NF " }";
164 tree_items
[depth
] = tree_items
[depth
] $
0;
165 level_count
[depth
]++;
168 indent ==
0 && tree_identifier
!= "" {
169 trees
[tree_identifier
] = format_items
();
170 tree_identifier =
"";
173 if (tree_identifier
!= "")
174 trees
[tree_identifier
] = format_items
()
176 indent ==
0 { tree_identifier = $
1 }
179 prefix =
"\nenum {\n";
181 if (opts
["strtab"]) {
183 bucketsort
(sorted_items
, all_items
);
184 for (i =
0; i
< num_entries
; i
++) {
186 if ((n =
index(entry_strtab
, s
"\1")) > 0) {
187 entry_offsets
[s
] = n
-1;
189 entry_offsets
[s
] =
length(entry_strtab
);
190 entry_strtab = entry_strtab s
"\1";
194 gsub("\1", "\"\n\t\"" bs
"0\" \"", entry_strtab
);
195 sub(/^
"/, "", entry_strtab);
196 sub(/\n[^\n]*$/, ";", entry_strtab);
197 print "\nstatic const char tree_strtab
[] =
" entry_strtab
199 for (item in entry_offsets) {
200 printf "%s
\t%s = %d
", prefix, item, entry_offsets[item];
205 for (i in subtree_offsets) {
206 printf "%s
\t%s_OFFSET = %d
", prefix, i, subtree_offsets[i];
209 if (!index(prefix, "enum
")) {
215 for (tree in trees) {
216 print "\n" trees[tree];
220 function format_items(s, i)
223 tree_items[depth] = tree_items[depth] ", \\\n\t{ 0 }";
224 level_count[depth]++;
228 for (i = 2; tree_items[i] != ""; i++) {
229 level_count[i] += level_count[i-1];
232 for (i in subtree_depth) {
233 subtree_offsets[i] += level_count[subtree_depth[i]-1];
234 delete subtree_depth[i];
237 for (i = 1; tree_items[i] != ""; i++) {
239 delete tree_items[i];
240 delete level_count[i];
244 return "#define " tree_identifier "_INITIALIZER" s;
247 # bucketsort(dst, src)
249 # Sort the elements of src by descending string length,
250 # placing them into dst[0] ... dst[n].
252 # Returns the number of elements.
253 function bucketsort
(dst
, src
, max
, count
, i
, t
)
255 # Note: ULTRIX 4.5 nawk does not support local array parameters
256 split("", bucketsort_buckets
);
260 if (i
> max
) { max = i
}
261 bucketsort_buckets
[i
]++
264 for (i = max
; i
> 0; i
--) {
265 if (i in bucketsort_buckets
) {
266 t = bucketsort_buckets
[i
]
267 bucketsort_buckets
[i
] = count
273 i =
length(t = src
[t
])
274 dst
[bucketsort_buckets
[i
]++] = t