README_DOCS.rst: wordsmith some of the commentary
[topgit/pro.git] / awk / topgit_navigate.awk
blob7827a0caa90b856cbab3fc68537351de54f2600f
1 #!/usr/bin/awk -f
3 # topgit_navigate - TopGit awk utility script used by tg--awksome
4 # Copyright (C) 2017,2019 Kyle J. McKay <mackyle@gmail.com>
5 # All rights reserved.
6 # License GPLv2
8 # topgit_navigate
10 # variable arguments (-v):
12 # brfile if non-empty, read TopGit branch names from here
13 # rmbr if true run system rm on brfile (after reading) if non-empty brfile
14 # anfile if non-empty, annihilated branch names are read from here
15 # rman if true run system rm on anfile (after reading) if non-empty anfile
16 # withan if true, mostly pretend anfile was empty (this is a convenience knob)
17 # exclbr whitespace separated list of names to exclude
18 # inclbr whitespace separated list of names to include
19 # startb starting branch name(s); may be empty (see below)
20 # pruneb zero or more (space separated) nodes to limit results to (see below)
21 # steps how many steps to move, negative means as many as possible
22 # chklps always check for loops even when not necessary
23 # rev if true move in reverse order (towards start/first rather than end/last)
24 # pin if true and steps is > 0 and runs off the end keep last visited node
25 # tgonly if true only nodes listed in brfile are output
26 # fldone output only the first field on each line
28 # NOTE: an integer "steps" value is REQUIRED
29 # NOTE: if "steps" is zero, a non-empty "startb" value is REQUIRED
31 # for inclbr and exclbr the "edge" referred to is any edge as provided on a
32 # line of the input stream (which is the output from run_awk_topgit_deps)
34 # if inclbr is non-empty only edges with at least one end listed in inclbr
35 # will be considered to be present
37 # if a branch name appears in exclbr any edge with either end listed in exclbr
38 # will be omitted trumping inclbr
40 # if a branch is listed in exclbr then all edges going to/from that branch
41 # are necessarily omitted which means effectively that any branch listed in
42 # exclbr has its .topdeps file treated as though it were empty
44 # input is a list of edges as output by run_awk_topgit_deps
46 # using the startb starting point the graph is walked forward (backward if rev)
47 # exactly steps times unless steps is negative in which case it's walked to the
48 # end (and pin is always implicitly true in that case)
50 # "forward" moves toward later patches whereas "backwards" moves towards earlier
51 # ones where earlier patches must be applied before later ones
53 # if startb is not empty and the starting node is excluded by !withan and/or
54 # tgonly, then if steps == 0 it's not output otherwise the first step will be
55 # to a non-excluded (by !withan || tgonly) node. Otherwise a single "step" is
56 # always from an included node to an included node. This allows starting from
57 # an annihilated or non-tgish node and navigating to a non-annihilated tgish
58 # node, for example. And then possibly continuing to step further.
60 # if pruneb is non-empty after splitting a loop check will be forced and then
61 # items are taken as positive refs (unless they are prefixed with "^") until
62 # a sole "^" which flips state so unprefixed refs are taken as negative (unless
63 # they are prefixed with "^") and so on -- very much like git rev-list except
64 # that an isolated "^" takes the place of "--not". However, if there are no
65 # positive refs found then all nodes in the input start out as positive.
66 # Then any positive refs are walked including all reachable nodes then any
67 # negative refs are walked excluding those so negative refs always trump
68 # positive refs just like git rev-list does; anything left over and not
69 # included acts like it was never there in the first place -- can't be visited
70 # or traversed.
72 # note that inclbr/exclbr operations are performed prior to pruneb operations
73 # which means that if inclbr/exclbr ends up excluding a node, listing that
74 # node's name in pruneb will have absolutely no effect whatsoever
76 # if startb contains more than one branch name, the requested operation is
77 # performed for each one and the results combined in that order, but there will
78 # be no way to distinguish where the boundary between results for the different
79 # branches lies in the output, but since sometimes that doesn't matter it's a
80 # helpful mode to have. Note that duplicate output is suppressed so, for
81 # example, using startb="a a" will always produce exactly the same output as
82 # just startb="a".
84 # if steps is 0 and startb is empty it's an error otherwise startb just gets
85 # dumped right back out unless it's been excluded (and no loop checking is
86 # performed in that case either unless chklps is true) on one line together
87 # with the containing head(s); this provides "contains" functionality and is an
88 # obvious special case of the general navigation described below
90 # Consider this TopGit DAG for the next section:
92 # C # content of branch "C"'s .topdeps file:
93 # / \ A
94 # A B B
96 # The linearized patch sequence is:
97 # patch A
98 # patch B
99 # patch C
101 # The "head(s)" of the graph is just the single node C
102 # The "ending point(s)" of the patch sequence is just patch C
103 # The "leaves"/"roots" of the graph are the nodes A and B
104 # The "starting point(s)" of the patch sequence is just patch A
106 # N.B. While the "head(s)" of the graph *DO* correspond exactly to
107 # the "ending point(s)" of the patch sequence,
108 # the "leaves"/"roots" *DO NOT ALWAYS* correspond exactly to
109 # the "starting point(s)" of the patch sequence!
111 # if startb is empty then one step forward (!rev) moves to all the roots or
112 # "leaf" nodes whereas one step backwards (rev) moves to all the heads or
113 # "ending" nodes; an empty startb with a negative steps (all the way) and a
114 # forward (!rev) direction starts at the leaves and moves to the heads;
115 # an empty startb with a negative steps (all the way) and a reverse (rev)
116 # direction starts at the heads or "ending points" and moves to the
117 # "starting points" which MAY BE DIFFERENT THAN THE "leaves"!
119 # Note that one step (either forward or backward) off of an empty startb
120 # is optimized and does NOT walk the graph nor cause loop checking (unless
121 # chklps is true). Additionally a steps value of 1 or any negative value
122 # with an empty startb will only output one single field on each output
123 # line -- the head/ending point, root/leaf, or starting point.
125 # As a special case, since the output is identical, a negative steps (all
126 # the way) moving forward (!rev) from an empty startb gets treated exactly
127 # the same as a single reverse step from an empty startb and is therefore
128 # also optimized and does not cause loop checking (unless chklps is true).
130 # N.B. a negative steps (all the way) moving backward (rev) from an empty
131 # startb IS NOT OPTIMIZED because it IS NOT THE SAME (as it finds "starting
132 # points") as moving one step forward from an empty startb (as that finds
133 # "root/leaf" nodes) since the set of "root/leaf nodes" may differ from the
134 # set of "starting point nodes".
137 # === stepping from nil startb ===
139 # | forced |
140 # steps | rev | chklps | result
141 # ------|-----|--------|------------------------------------------------
142 # 1 | 0 | No | "roots"/"leaves" of TopGit DAG
143 # 1 | 1 | No | "heads"/"ending/final patches of patch series"
144 # -1 | 0 | No | "heads"/"ending/final patches of patch series"
145 # -1 | 1 | Yes | "starting/first patches of patch series"
147 # with all but the three special cases just mentioned (the three "No" rows
148 # in the table above), loop detection is performed first to avoid problems
149 # and will cause an exit status of EX_DATAERR (65) if loops are detected in
150 # which case no output is produced; the recommended way to check for loops is
151 # with an empty startb, steps=-1 and chklps=1 and redirecting output to
152 # /dev/null (or capturing the heads output on success) and then testing the
153 # exit status for an EX_DATAERR (65) result
155 # the effect of "navigation" is as though the heads containing startb are first
156 # determined (by walking "forward" as far as possible) then those heads are
157 # enumerated in postfix order (each node is only visited once though -- the
158 # first time it's encountered in the enumeration) forming one or more linear
159 # lists (this step is not possible if loops are present hence the loop
160 # detection check). Then the requested number of steps are taken from the
161 # location startb appears in each list and the possibly none, possibly many
162 # results are output one per line like so:
164 # <result_branch_name> <containing_topgit_head_branch_name>...
166 # there will always be at least one containing branch name even if it's the
167 # same as the result branch name (unless startb is empty and steps is negative
168 # or 1), but there could be more (space separated) if the branch is part of
169 # more than one patch series; if fldone is true then only the first field shown
170 # above (the result branch name) will be output on each line no matter what
173 BEGIN { exitcode = ""; stderr = "exec cat>&2"; }
174 function exitnow(e) { exitcode=e; exit e }
175 END { if (exitcode != "") exit exitcode }
177 BEGIN {
178 if (steps !~ /^-?[0-9]+$/) exitnow(2)
179 steps = 0 + steps
180 startcnt = 0
181 cnt = split(startb, ascratch, " ")
182 for (i = 1; i <= cnt; ++i) {
183 if (!(ascratch[i] in seen)) {
184 starts[++startcnt] = ascratch[i]
185 seen[ascratch[i]] = 1
188 if (!startcnt) {
189 if (!steps) exitnow(2)
190 if (steps < 0 && !rev) {
191 rev = 1
192 steps = 1
194 if (steps < 0) fldone = 1
196 if (steps < 0) steps = -1
197 inconly = 0
198 cnt = split(inclbr, scratch, " ")
199 if (cnt) {
200 inconly = 1
201 for (i = 1; i <= cnt; ++i) incnames[scratch[i]] = 1
203 cnt = split(exclbr, scratch, " ")
204 for (i = 1; i <= cnt; ++i) excnames[scratch[i]] = 1
205 prunecnt = split(pruneb, prunes, " ")
206 nots = 0
207 for (node in prunes) if (node == "^") ++nots
208 if (nots >= prunecnt) prunecnt = 0
209 ordidx = 0
212 function quotevar(v) {
213 gsub(/\047/, "\047\\\047\047", v)
214 return "\047" v "\047"
217 function rmfiles() {
218 rmlist = ""
219 if (rmbr && brfile != "") rmlist = rmlist " " quotevar(brfile)
220 if (rman && anfile != "") rmlist = rmlist " " quotevar(anfile)
221 if (rmlist != "") {
222 system("rm -f" rmlist)
223 rmbr = 0
224 brfile = ""
225 rman = 0
226 anfile = ""
229 END { rmfiles() }
231 function init(abranch, _e) {
232 if (brfile != "") {
233 if (tgonly) {
234 while ((_e = (getline abranch <brfile)) > 0) {
235 if (abranch != "") tgish[abranch] = 1
237 close(brfile)
238 if (_e < 0) exitnow(2)
241 if (anfile != "") {
242 while ((_e = (getline abranch <anfile)) > 0) {
243 if (abranch != "") ann[abranch] = 1
245 close(anfile)
246 if (_e < 0) exitnow(2)
248 rmfiles()
251 function incledge(b1, b2) {
252 return !(b1 in excnames) && !(b2 in excnames) &&
253 (!inconly || b1 in incnames || b2 in incnames)
256 function wanted(abranch) {
257 return (withan || !(abranch in ann)) && (!tgonly || (abranch in tgish))
260 NR == 1 {init()}
262 function addlink(anarray, anode, alink, _linkstr) {
263 if (alink == "") return
264 _linkstr = anarray[anode]
265 if (length(_linkstr) < 3) {
266 _linkstr = " " alink " "
267 } else if (!index(_linkstr, " " alink " ")) {
268 _linkstr = _linkstr alink " "
270 anarray[anode] = _linkstr
273 NF == 2 && $1 != "" && $2 != "" && incledge($1, $2) {
274 if (!($2 in nodes)) {
275 nodes[$2] = 1
276 ordered[++ordidx] = $2
278 if (!($1 in nodes)) {
279 nodes[$1] = 1
280 ordered[++ordidx] = $1
282 if ($1 != $2 && !($1 in ann)) {
283 addlink(incoming, $2, $1)
284 addlink(outgoing, $1, $2)
285 edgenodes[$1] = 1
286 edgenodes[$2] = 1
290 function checkloops() {
291 for (edge in incoming) curinc[edge] = incoming[edge]
292 for (node in edgenodes) if (!(node in curinc)) curnodes[node] = 1
293 for (;;) {
294 node = ""
295 for (node in curnodes) break
296 if (node == "") break
297 delete curnodes[node]
298 if ((node in outgoing) && split(outgoing[node], links, " ")) {
299 for (linki in links) {
300 link = links[linki]
301 if (link in curinc) {
302 inclist = curinc[link]
303 if ((idx = index(inclist, " " node " "))) {
304 inclist = substr(inclist, 1, idx) \
305 substr(inclist,
306 idx + length(node) + 2)
307 if (length(inclist) < 3) {
308 delete curinc[link]
309 curnodes[link] = 1
310 } else {
311 curinc[link] = inclist
317 delete edgenodes[node]
319 for (node in edgenodes) exitnow(65)
322 function collectstarts(anarray, _i) {
323 for (_i = 1; _i <= ordidx; ++_i) {
324 if ((ordered[_i] in nodes) && !(ordered[_i] in anarray))
325 starts[++startcnt] = ordered[_i]
329 function marknodes(anode, val, _outlinks, _oneout) {
330 if (!(anode in nodes)) return
331 if (nodes[anode] == val) return
332 nodes[anode] = val
333 if (anode in outgoing) {
334 split(outgoing[anode], _outlinks, " ")
335 for (_oneout in _outlinks) marknodes(_outlinks[_oneout], val)
339 function getheads_(anode, theheads, seen, headcnt, _cnt, _i, _inlinks) {
340 if (!(anode in nodes)) return
341 if (!(anode in incoming)) {
342 if (!(anode in seen)) {
343 seen[anode] = 1
344 theheads[++headcnt[0]] = anode
346 return
348 _cnt = split(incoming[anode], _inlinks, " ")
349 for (_i = 1; _i <= _cnt; ++_i)
350 getheads_(_inlinks[_i], theheads, seen, headcnt)
353 function getheads(anode, theheads, _seen, _headcnt) {
354 split("", theheads, " ")
355 split("", _seen, " ")
356 _headcnt[0] = 0
357 getheads_(anode, theheads, _seen, _headcnt)
358 return _headcnt[0]
361 function getpath_(anode, pnodes, arlinks, _seen, _pcnt, _children, _ccnt, _i) {
362 if (anode in _seen) return
363 _seen[anode] = 1
364 if (anode in arlinks) {
365 _ccnt = split(arlinks[anode], _children, " ")
366 for (_i = 1; _i <= _ccnt; ++_i)
367 getpath_(_children[_i], pnodes, arlinks, _seen, _pcnt)
369 pnodes[++_pcnt[0]] = anode
372 function getpath(anode, pnodes, arlinks, _seen, _pcnt, _z) {
373 split("", pnodes, " ");
374 split("", _seen, " ")
375 _pcnt[0] = 0
376 getpath_(anode, pnodes, arlinks, _seen, _pcnt)
377 return _pcnt[0]
378 printf "%s", "PATH " anode " |"
379 for (_z = 1; _z <= _pcnt[0]; ++_z) printf " %s" pnodes[_z]
380 printf "\n"
383 END {
384 if (chklps || startcnt || prunecnt || steps != 1) checkloops()
385 if (prunecnt) {
386 state = 1
387 for (i = 1; i <= prunecnt; ++ i) {
388 onep = prunes[i]
389 if (onep == "^") {
390 state = 1 - state
391 continue
393 if (substr(onep, 1, 1) == "^") {
394 if (state) negnodes[substr(onep, 2)] = 1
395 else poznodes[substr(onep, 2)] = 1
396 } else {
397 if (state) poznodes[onep] = 1
398 else negnodes[onep] = 1
401 for (onep in poznodes) {
402 for (anode in nodes) nodes[anode] = ""
403 break
405 for (onep in poznodes) marknodes(onep, 1)
406 for (onep in negnodes) marknodes(onep, 0)
407 for (onep in nodes) if (nodes[onep]) tmpa[onep] = 1
408 split("", nodes, " ")
409 for (onep in tmpa) if (tmpa[onep]) nodes[onep] = 1
410 split("", tmpa, " ")
411 for (onep in outgoing) tmpa[onep] = outgoing[onep]
412 split("", incoming, " ")
413 split("", outgoing, " ")
414 for (onep in nodes) {
415 if (onep in tmpa) {
416 lcnt = split(tmpa[onep], links, " ")
417 for (i = 1; i <= lcnt; ++i) {
418 dest = links[i]
419 if (dest in nodes) {
420 addlink(incoming, dest, onep)
421 addlink(outgoing, onep, dest)
427 if (!startcnt) {
428 if (steps < 0 && !rev) {
429 print "internal error: non-optimized steps=-1 rev=0" |stderr
430 exitnow(70) # EX_SOFTWARE
432 if (rev)
433 collectstarts(incoming)
434 else
435 collectstarts(outgoing)
436 if (steps > 0) --steps
437 if (steps == 0 || !startcnt) {
438 for (i = 1; i <= startcnt; ++i)
439 if (wanted(starts[i])) print starts[i]
440 exit 0
443 resultcnt = 0
444 for (i = 1; i <= startcnt; ++i) {
445 headcnt = getheads(starts[i], heads)
446 for (j = 1; j <= headcnt; ++j) {
447 pathcnt = getpath(heads[j], path, outgoing)
448 for (pathidx = 1; pathidx <= pathcnt; ++pathidx)
449 if (path[pathidx] == starts[i]) break
450 if (pathidx > pathcnt) continue
451 adjsteps = steps
452 if (!wanted(path[pathidx])) {
453 if (!steps) continue
454 if (steps > 0) --adjsteps
455 incr = rev ? -1 : 1
456 do pathidx += incr
457 while (pathidx >= 1 && pathidx <= pathcnt &&
458 !wanted(path[pathidx]))
460 if (pathidx >= 1 && pathidx <= pathcnt) {
461 oldcnt = pathcnt
462 newstart = path[pathidx]
463 pathidx = 0
464 pathcnt = 0
465 for (k = 1; k <= oldcnt; ++k) {
466 if (wanted(path[k])) {
467 path[++pathcnt] = path[k]
468 if (!pathidx && path[pathcnt] == newstart)
469 pathidx = pathcnt
472 if (!pathidx) {
473 print "internal error: wanted disappeared" |stderr
474 exitnow(70) # EX_SOFTWARE
477 if (steps < 0) {
478 pathidx = rev ? 1 : pathcnt
479 } else {
480 if (rev) pathidx -= adjsteps
481 else pathidx += adjsteps
483 if (pin) {
484 if (pathidx < 1) pathidx = 1
485 else if (pathidx > pathcnt) pathidx = pathcnt
487 if (pathidx < 1 || pathidx > pathcnt) continue
488 aresult = path[pathidx]
489 if (aresult in seenresults) {
490 resultidx = seenresults[aresult]
491 resultlist = results[resultidx]
492 } else {
493 resultidx = ++resultcnt
494 resultnames[resultidx] = aresult
495 seenresults[aresult] = resultidx
496 resultlist = " "
498 if (!index(resultlist, " " heads[j] " ")) {
499 resultlist = resultlist heads[j] " "
500 results[resultidx] = resultlist
504 for (i = 1; i <= resultcnt; ++i) {
505 if (fldone) print resultnames[i]
506 else print resultnames[i] substr(results[i], 1, length(results[i]) - 1)