3 # topgit_navigate - TopGit awk utility script used by tg--awksome
4 # Copyright (C) 2017,2019 Kyle J. McKay <mackyle@gmail.com>
10 # variable arguments (-v):
12 # brfile if non-empty, read TopGit branch names from here
13 # rmbr if true run system rm on brfile (after reading) if non-empty brfile
14 # anfile if non-empty, annihilated branch names are read from here
15 # rman if true run system rm on anfile (after reading) if non-empty anfile
16 # withan if true, mostly pretend anfile was empty (this is a convenience knob)
17 # exclbr whitespace separated list of names to exclude
18 # inclbr whitespace separated list of names to include
19 # startb starting branch name(s); may be empty (see below)
20 # pruneb zero or more (space separated) nodes to limit results to (see below)
21 # steps how many steps to move, negative means as many as possible
22 # chklps always check for loops even when not necessary
23 # rev if true move in reverse order (towards start/first rather than end/last)
24 # pin if true and steps is > 0 and runs off the end keep last visited node
25 # tgonly if true only nodes listed in brfile are output
26 # fldone output only the first field on each line
28 # NOTE: an integer "steps" value is REQUIRED
29 # NOTE: if "steps" is zero, a non-empty "startb" value is REQUIRED
31 # for inclbr and exclbr the "edge" referred to is any edge as provided on a
32 # line of the input stream (which is the output from run_awk_topgit_deps)
34 # if inclbr is non-empty only edges with at least one end listed in inclbr
35 # will be considered to be present
37 # if a branch name appears in exclbr any edge with either end listed in exclbr
38 # will be omitted trumping inclbr
40 # if a branch is listed in exclbr then all edges going to/from that branch
41 # are necessarily omitted which means effectively that any branch listed in
42 # exclbr has its .topdeps file treated as though it were empty
44 # input is a list of edges as output by run_awk_topgit_deps
46 # using the startb starting point the graph is walked forward (backward if rev)
47 # exactly steps times unless steps is negative in which case it's walked to the
48 # end (and pin is always implicitly true in that case)
50 # "forward" moves toward later patches whereas "backwards" moves towards earlier
51 # ones where earlier patches must be applied before later ones
53 # if startb is not empty and the starting node is excluded by !withan and/or
54 # tgonly, then if steps == 0 it's not output otherwise the first step will be
55 # to a non-excluded (by !withan || tgonly) node. Otherwise a single "step" is
56 # always from an included node to an included node. This allows starting from
57 # an annihilated or non-tgish node and navigating to a non-annihilated tgish
58 # node, for example. And then possibly continuing to step further.
60 # if pruneb is non-empty after splitting a loop check will be forced and then
61 # items are taken as positive refs (unless they are prefixed with "^") until
62 # a sole "^" which flips state so unprefixed refs are taken as negative (unless
63 # they are prefixed with "^") and so on -- very much like git rev-list except
64 # that an isolated "^" takes the place of "--not". However, if there are no
65 # positive refs found then all nodes in the input start out as positive.
66 # Then any positive refs are walked including all reachable nodes then any
67 # negative refs are walked excluding those so negative refs always trump
68 # positive refs just like git rev-list does; anything left over and not
69 # included acts like it was never there in the first place -- can't be visited
72 # note that inclbr/exclbr operations are performed prior to pruneb operations
73 # which means that if inclbr/exclbr ends up excluding a node, listing that
74 # node's name in pruneb will have absolutely no effect whatsoever
76 # if startb contains more than one branch name, the requested operation is
77 # performed for each one and the results combined in that order, but there will
78 # be no way to distinguish where the boundary between results for the different
79 # branches lies in the output, but since sometimes that doesn't matter it's a
80 # helpful mode to have. Note that duplicate output is suppressed so, for
81 # example, using startb="a a" will always produce exactly the same output as
84 # if steps is 0 and startb is empty it's an error otherwise startb just gets
85 # dumped right back out unless it's been excluded (and no loop checking is
86 # performed in that case either unless chklps is true) on one line together
87 # with the containing head(s); this provides "contains" functionality and is an
88 # obvious special case of the general navigation described below
90 # Consider this TopGit DAG for the next section:
92 # C # content of branch "C"'s .topdeps file:
96 # The linearized patch sequence is:
101 # The "head(s)" of the graph is just the single node C
102 # The "ending point(s)" of the patch sequence is just patch C
103 # The "leaves"/"roots" of the graph are the nodes A and B
104 # The "starting point(s)" of the patch sequence is just patch A
106 # N.B. While the "head(s)" of the graph *DO* correspond exactly to
107 # the "ending point(s)" of the patch sequence,
108 # the "leaves"/"roots" *DO NOT ALWAYS* correspond exactly to
109 # the "starting point(s)" of the patch sequence!
111 # if startb is empty then one step forward (!rev) moves to all the roots or
112 # "leaf" nodes whereas one step backwards (rev) moves to all the heads or
113 # "ending" nodes; an empty startb with a negative steps (all the way) and a
114 # forward (!rev) direction starts at the leaves and moves to the heads;
115 # an empty startb with a negative steps (all the way) and a reverse (rev)
116 # direction starts at the heads or "ending points" and moves to the
117 # "starting points" which MAY BE DIFFERENT THAN THE "leaves"!
119 # Note that one step (either forward or backward) off of an empty startb
120 # is optimized and does NOT walk the graph nor cause loop checking (unless
121 # chklps is true). Additionally a steps value of 1 or any negative value
122 # with an empty startb will only output one single field on each output
123 # line -- the head/ending point, root/leaf, or starting point.
125 # As a special case, since the output is identical, a negative steps (all
126 # the way) moving forward (!rev) from an empty startb gets treated exactly
127 # the same as a single reverse step from an empty startb and is therefore
128 # also optimized and does not cause loop checking (unless chklps is true).
130 # N.B. a negative steps (all the way) moving backward (rev) from an empty
131 # startb IS NOT OPTIMIZED because it IS NOT THE SAME (as it finds "starting
132 # points") as moving one step forward from an empty startb (as that finds
133 # "root/leaf" nodes) since the set of "root/leaf nodes" may differ from the
134 # set of "starting point nodes".
137 # === stepping from nil startb ===
140 # steps | rev | chklps | result
141 # ------|-----|--------|------------------------------------------------
142 # 1 | 0 | No | "roots"/"leaves" of TopGit DAG
143 # 1 | 1 | No | "heads"/"ending/final patches of patch series"
144 # -1 | 0 | No | "heads"/"ending/final patches of patch series"
145 # -1 | 1 | Yes | "starting/first patches of patch series"
147 # with all but the three special cases just mentioned (the three "No" rows
148 # in the table above), loop detection is performed first to avoid problems
149 # and will cause an exit status of EX_DATAERR (65) if loops are detected in
150 # which case no output is produced; the recommended way to check for loops is
151 # with an empty startb, steps=-1 and chklps=1 and redirecting output to
152 # /dev/null (or capturing the heads output on success) and then testing the
153 # exit status for an EX_DATAERR (65) result
155 # the effect of "navigation" is as though the heads containing startb are first
156 # determined (by walking "forward" as far as possible) then those heads are
157 # enumerated in postfix order (each node is only visited once though -- the
158 # first time it's encountered in the enumeration) forming one or more linear
159 # lists (this step is not possible if loops are present hence the loop
160 # detection check). Then the requested number of steps are taken from the
161 # location startb appears in each list and the possibly none, possibly many
162 # results are output one per line like so:
164 # <result_branch_name> <containing_topgit_head_branch_name>...
166 # there will always be at least one containing branch name even if it's the
167 # same as the result branch name (unless startb is empty and steps is negative
168 # or 1), but there could be more (space separated) if the branch is part of
169 # more than one patch series; if fldone is true then only the first field shown
170 # above (the result branch name) will be output on each line no matter what
173 BEGIN { exitcode =
""; stderr =
"exec cat>&2"; }
174 function exitnow
(e
) { exitcode=e
; exit e
}
175 END { if (exitcode
!= "") exit exitcode
}
178 if (steps !~
/^
-?
[0-9]+$
/) exitnow
(2)
181 cnt =
split(startb
, ascratch
, " ")
182 for (i =
1; i
<= cnt
; ++i
) {
183 if (!
(ascratch
[i
] in seen
)) {
184 starts
[++startcnt
] = ascratch
[i
]
185 seen
[ascratch
[i
]] =
1
189 if (!steps
) exitnow
(2)
190 if (steps
< 0 && !rev
) {
194 if (steps
< 0) fldone =
1
196 if (steps
< 0) steps =
-1
198 cnt =
split(inclbr
, scratch
, " ")
201 for (i =
1; i
<= cnt
; ++i
) incnames
[scratch
[i
]] =
1
203 cnt =
split(exclbr
, scratch
, " ")
204 for (i =
1; i
<= cnt
; ++i
) excnames
[scratch
[i
]] =
1
205 prunecnt =
split(pruneb
, prunes
, " ")
207 for (node in prunes
) if (node ==
"^") ++nots
208 if (nots
>= prunecnt
) prunecnt =
0
212 function quotevar
(v
) {
213 gsub(/\047/, "\047\\\047\047", v
)
214 return "\047" v
"\047"
219 if (rmbr
&& brfile
!= "") rmlist = rmlist
" " quotevar
(brfile
)
220 if (rman
&& anfile
!= "") rmlist = rmlist
" " quotevar
(anfile
)
222 system("rm -f" rmlist
)
231 function init
(abranch
, _e
) {
234 while ((_e =
(getline abranch
<brfile
)) > 0) {
235 if (abranch
!= "") tgish
[abranch
] =
1
238 if (_e
< 0) exitnow
(2)
242 while ((_e =
(getline abranch
<anfile
)) > 0) {
243 if (abranch
!= "") ann
[abranch
] =
1
246 if (_e
< 0) exitnow
(2)
251 function incledge
(b1
, b2
) {
252 return !
(b1 in excnames
) && !
(b2 in excnames
) &&
253 (!inconly
|| b1 in incnames
|| b2 in incnames
)
256 function wanted
(abranch
) {
257 return (withan
|| !
(abranch in ann
)) && (!tgonly
|| (abranch in tgish
))
262 function addlink
(anarray
, anode
, alink
, _linkstr
) {
263 if (alink ==
"") return
264 _linkstr = anarray
[anode
]
265 if (length(_linkstr
) < 3) {
266 _linkstr =
" " alink
" "
267 } else if (!
index(_linkstr
, " " alink
" ")) {
268 _linkstr = _linkstr alink
" "
270 anarray
[anode
] = _linkstr
273 NF ==
2 && $
1 != "" && $
2 != "" && incledge
($
1, $
2) {
274 if (!
($
2 in nodes
)) {
276 ordered
[++ordidx
] = $
2
278 if (!
($
1 in nodes
)) {
280 ordered
[++ordidx
] = $
1
282 if ($
1 != $
2 && !
($
1 in ann
)) {
283 addlink
(incoming
, $
2, $
1)
284 addlink
(outgoing
, $
1, $
2)
290 function checkloops
() {
291 for (edge in incoming
) curinc
[edge
] = incoming
[edge
]
292 for (node in edgenodes
) if (!
(node in curinc
)) curnodes
[node
] =
1
295 for (node in curnodes
) break
296 if (node ==
"") break
297 delete curnodes
[node
]
298 if ((node in outgoing
) && split(outgoing
[node
], links
, " ")) {
299 for (linki in links
) {
301 if (link in curinc
) {
302 inclist = curinc
[link
]
303 if ((idx =
index(inclist
, " " node
" "))) {
304 inclist =
substr(inclist
, 1, idx
) \
306 idx
+ length(node
) + 2)
307 if (length(inclist
) < 3) {
311 curinc
[link
] = inclist
317 delete edgenodes
[node
]
319 for (node in edgenodes
) exitnow
(65)
322 function collectstarts
(anarray
, _i
) {
323 for (_i =
1; _i
<= ordidx
; ++_i
) {
324 if ((ordered
[_i
] in nodes
) && !
(ordered
[_i
] in anarray
))
325 starts
[++startcnt
] = ordered
[_i
]
329 function marknodes
(anode
, val
, _outlinks
, _oneout
) {
330 if (!
(anode in nodes
)) return
331 if (nodes
[anode
] == val
) return
333 if (anode in outgoing
) {
334 split(outgoing
[anode
], _outlinks
, " ")
335 for (_oneout in _outlinks
) marknodes
(_outlinks
[_oneout
], val
)
339 function getheads_
(anode
, theheads
, seen
, headcnt
, _cnt
, _i
, _inlinks
) {
340 if (!
(anode in nodes
)) return
341 if (!
(anode in incoming
)) {
342 if (!
(anode in seen
)) {
344 theheads
[++headcnt
[0]] = anode
348 _cnt =
split(incoming
[anode
], _inlinks
, " ")
349 for (_i =
1; _i
<= _cnt
; ++_i
)
350 getheads_
(_inlinks
[_i
], theheads
, seen
, headcnt
)
353 function getheads
(anode
, theheads
, _seen
, _headcnt
) {
354 split("", theheads
, " ")
355 split("", _seen
, " ")
357 getheads_
(anode
, theheads
, _seen
, _headcnt
)
361 function getpath_
(anode
, pnodes
, arlinks
, _seen
, _pcnt
, _children
, _ccnt
, _i
) {
362 if (anode in _seen
) return
364 if (anode in arlinks
) {
365 _ccnt =
split(arlinks
[anode
], _children
, " ")
366 for (_i =
1; _i
<= _ccnt
; ++_i
)
367 getpath_
(_children
[_i
], pnodes
, arlinks
, _seen
, _pcnt
)
369 pnodes
[++_pcnt
[0]] = anode
372 function getpath
(anode
, pnodes
, arlinks
, _seen
, _pcnt
, _z
) {
373 split("", pnodes
, " ");
374 split("", _seen
, " ")
376 getpath_
(anode
, pnodes
, arlinks
, _seen
, _pcnt
)
378 printf "%s", "PATH " anode
" |"
379 for (_z =
1; _z
<= _pcnt
[0]; ++_z
) printf " %s" pnodes
[_z
]
384 if (chklps
|| startcnt
|| prunecnt
|| steps
!= 1) checkloops
()
387 for (i =
1; i
<= prunecnt
; ++ i
) {
393 if (substr(onep
, 1, 1) ==
"^") {
394 if (state
) negnodes
[substr(onep
, 2)] =
1
395 else poznodes
[substr(onep
, 2)] =
1
397 if (state
) poznodes
[onep
] =
1
398 else negnodes
[onep
] =
1
401 for (onep in poznodes
) {
402 for (anode in nodes
) nodes
[anode
] =
""
405 for (onep in poznodes
) marknodes
(onep
, 1)
406 for (onep in negnodes
) marknodes
(onep
, 0)
407 for (onep in nodes
) if (nodes
[onep
]) tmpa
[onep
] =
1
408 split("", nodes
, " ")
409 for (onep in tmpa
) if (tmpa
[onep
]) nodes
[onep
] =
1
411 for (onep in outgoing
) tmpa
[onep
] = outgoing
[onep
]
412 split("", incoming
, " ")
413 split("", outgoing
, " ")
414 for (onep in nodes
) {
416 lcnt =
split(tmpa
[onep
], links
, " ")
417 for (i =
1; i
<= lcnt
; ++i
) {
420 addlink
(incoming
, dest
, onep
)
421 addlink
(outgoing
, onep
, dest
)
428 if (steps
< 0 && !rev
) {
429 print "internal error: non-optimized steps=-1 rev=0" |stderr
430 exitnow
(70) # EX_SOFTWARE
433 collectstarts
(incoming
)
435 collectstarts
(outgoing
)
436 if (steps
> 0) --steps
437 if (steps ==
0 || !startcnt
) {
438 for (i =
1; i
<= startcnt
; ++i
)
439 if (wanted
(starts
[i
])) print starts
[i
]
444 for (i =
1; i
<= startcnt
; ++i
) {
445 headcnt = getheads
(starts
[i
], heads
)
446 for (j =
1; j
<= headcnt
; ++j
) {
447 pathcnt = getpath
(heads
[j
], path
, outgoing
)
448 for (pathidx =
1; pathidx
<= pathcnt
; ++pathidx
)
449 if (path
[pathidx
] == starts
[i
]) break
450 if (pathidx
> pathcnt
) continue
452 if (!wanted
(path
[pathidx
])) {
454 if (steps
> 0) --adjsteps
457 while (pathidx
>=
1 && pathidx
<= pathcnt
&&
458 !wanted
(path
[pathidx
]))
460 if (pathidx
>=
1 && pathidx
<= pathcnt
) {
462 newstart = path
[pathidx
]
465 for (k =
1; k
<= oldcnt
; ++k
) {
466 if (wanted
(path
[k
])) {
467 path
[++pathcnt
] = path
[k
]
468 if (!pathidx
&& path
[pathcnt
] == newstart
)
473 print "internal error: wanted disappeared" |stderr
474 exitnow
(70) # EX_SOFTWARE
478 pathidx = rev ?
1 : pathcnt
480 if (rev
) pathidx
-= adjsteps
481 else pathidx
+= adjsteps
484 if (pathidx
< 1) pathidx =
1
485 else if (pathidx
> pathcnt
) pathidx = pathcnt
487 if (pathidx
< 1 || pathidx
> pathcnt
) continue
488 aresult = path
[pathidx
]
489 if (aresult in seenresults
) {
490 resultidx = seenresults
[aresult
]
491 resultlist = results
[resultidx
]
493 resultidx =
++resultcnt
494 resultnames
[resultidx
] = aresult
495 seenresults
[aresult
] = resultidx
498 if (!
index(resultlist
, " " heads
[j
] " ")) {
499 resultlist = resultlist heads
[j
] " "
500 results
[resultidx
] = resultlist
504 for (i =
1; i
<= resultcnt
; ++i
) {
505 if (fldone
) print resultnames
[i
]
506 else print resultnames
[i
] substr(results
[i
], 1, length(results
[i
]) - 1)