awk/topgit_navigate.awk

   1 #!/usr/bin/awk -f
   2
   3 # topgit_navigate - TopGit awk utility script used by tg--awksome
   4 # Copyright (C) 2017,2019 Kyle J. McKay <mackyle@gmail.com>
   5 # All rights reserved.
   6 # License GPLv2
   7
   8 # topgit_navigate
   9 #
  10 # variable arguments (-v):
  11 #
  12 #   brfile  if non-empty, read TopGit branch names from here
  13 #   rmbr    if true run system rm on brfile (after reading) if non-empty brfile
  14 #   anfile  if non-empty, annihilated branch names are read from here
  15 #   rman    if true run system rm on anfile (after reading) if non-empty anfile
  16 #   withan  if true, mostly pretend anfile was empty (this is a convenience knob)
  17 #   exclbr  whitespace separated list of names to exclude
  18 #   inclbr  whitespace separated list of names to include
  19 #   startb  starting branch name(s); may be empty (see below)
  20 #   pruneb  zero or more (space separated) nodes to limit results to (see below)
  21 #   steps   how many steps to move, negative means as many as possible
  22 #   chklps  always check for loops even when not necessary
  23 #   rev     if true move in reverse order (towards start/first rather than end/last)
  24 #   pin     if true and steps is > 0 and runs off the end keep last visited node
  25 #   tgonly  if true only nodes listed in brfile are output
  26 #   fldone  output only the first field on each line
  27 #
  28 # NOTE: an integer "steps" value is REQUIRED
  29 # NOTE: if "steps" is zero, a non-empty "startb" value is REQUIRED
  30 #
  31 # for inclbr and exclbr the "edge" referred to is any edge as provided on a
  32 # line of the input stream (which is the output from run_awk_topgit_deps)
  33 #
  34 # if inclbr is non-empty only edges with at least one end listed in inclbr
  35 # will be considered to be present
  36 #
  37 # if a branch name appears in exclbr any edge with either end listed in exclbr
  38 # will be omitted trumping inclbr
  39 #
  40 # if a branch is listed in exclbr then all edges going to/from that branch
  41 # are necessarily omitted which means effectively that any branch listed in
  42 # exclbr has its .topdeps file treated as though it were empty
  43 #
  44 # input is a list of edges as output by run_awk_topgit_deps
  45 #
  46 # using the startb starting point the graph is walked forward (backward if rev)
  47 # exactly steps times unless steps is negative in which case it's walked to the
  48 # end (and pin is always implicitly true in that case)
  49 #
  50 # "forward" moves toward later patches whereas "backwards" moves towards earlier
  51 # ones where earlier patches must be applied before later ones
  52 #
  53 # if startb is not empty and the starting node is excluded by !withan and/or
  54 # tgonly, then if steps == 0 it's not output otherwise the first step will be
  55 # to a non-excluded (by !withan || tgonly) node.  Otherwise a single "step" is
  56 # always from an included node to an included node.  This allows starting from
  57 # an annihilated or non-tgish node and navigating to a non-annihilated tgish
  58 # node, for example.  And then possibly continuing to step further.
  59 #
  60 # if pruneb is non-empty after splitting a loop check will be forced and then
  61 # items are taken as positive refs (unless they are prefixed with "^") until
  62 # a sole "^" which flips state so unprefixed refs are taken as negative (unless
  63 # they are prefixed with "^") and so on -- very much like git rev-list except
  64 # that an isolated "^" takes the place of "--not".  However, if there are no
  65 # positive refs found then all nodes in the input start out as positive.
  66 # Then any positive refs are walked including all reachable nodes then any
  67 # negative refs are walked excluding those so negative refs always trump
  68 # positive refs just like git rev-list does; anything left over and not
  69 # included acts like it was never there in the first place -- can't be visited
  70 # or traversed.
  71 #
  72 # note that inclbr/exclbr operations are performed prior to pruneb operations
  73 # which means that if inclbr/exclbr ends up excluding a node, listing that
  74 # node's name in pruneb will have absolutely no effect whatsoever
  75 #
  76 # if startb contains more than one branch name, the requested operation is
  77 # performed for each one and the results combined in that order, but there will
  78 # be no way to distinguish where the boundary between results for the different
  79 # branches lies in the output, but since sometimes that doesn't matter it's a
  80 # helpful mode to have.  Note that duplicate output is suppressed so, for
  81 # example, using startb="a a" will always produce exactly the same output as
  82 # just startb="a".
  83 #
  84 # if steps is 0 and startb is empty it's an error otherwise startb just gets
  85 # dumped right back out unless it's been excluded (and no loop checking is
  86 # performed in that case either unless chklps is true) on one line together
  87 # with the containing head(s); this provides "contains" functionality and is an
  88 # obvious special case of the general navigation described below
  89 #
  90 # Consider this TopGit DAG for the next section:
  91 #
  92 #      C       # content of branch "C"'s .topdeps file:
  93 #     / \      A
  94 #    A   B     B
  95 #
  96 # The linearized patch sequence is:
  97 #   patch A
  98 #   patch B
  99 #   patch C
 100 #
 101 # The "head(s)" of the graph is just the single node C
 102 # The "ending point(s)" of the patch sequence is just patch C
 103 # The "leaves"/"roots" of the graph are the nodes A and B
 104 # The "starting point(s)" of the patch sequence is just patch A
 105 #
 106 # N.B. While the "head(s)" of the graph *DO* correspond exactly to
 107 #      the "ending point(s)" of the patch sequence,
 108 #      the "leaves"/"roots" *DO NOT ALWAYS* correspond exactly to
 109 #      the "starting point(s)" of the patch sequence!
 110 #
 111 # if startb is empty then one step forward (!rev) moves to all the roots or
 112 # "leaf" nodes whereas one step backwards (rev) moves to all the heads or
 113 # "ending" nodes; an empty startb with a negative steps (all the way) and a
 114 # forward (!rev) direction starts at the leaves and moves to the heads;
 115 # an empty startb with a negative steps (all the way) and a reverse (rev)
 116 # direction starts at the heads or "ending points" and moves to the
 117 # "starting points" which MAY BE DIFFERENT THAN THE "leaves"!
 118 #
 119 # Note that one step (either forward or backward) off of an empty startb
 120 # is optimized and does NOT walk the graph nor cause loop checking (unless
 121 # chklps is true).  Additionally a steps value of 1 or any negative value
 122 # with an empty startb will only output one single field on each output
 123 # line -- the head/ending point, root/leaf, or starting point.
 124 #
 125 # As a special case, since the output is identical, a negative steps (all
 126 # the way) moving forward (!rev) from an empty startb gets treated exactly
 127 # the same as a single reverse step from an empty startb and is therefore
 128 # also optimized and does not cause loop checking (unless chklps is true).
 129 #
 130 # N.B. a negative steps (all the way) moving backward (rev) from an empty
 131 # startb IS NOT OPTIMIZED because it IS NOT THE SAME (as it finds "starting
 132 # points") as moving one step forward from an empty startb (as that finds
 133 # "root/leaf" nodes) since the set of "root/leaf nodes" may differ from the
 134 # set of "starting point nodes".
 135 #
 136 #
 137 #   === stepping from nil startb ===
 138 #
 139 #               | forced |
 140 #   steps | rev | chklps | result
 141 #   ------|-----|--------|------------------------------------------------
 142 #     1   |  0  |   No   | "roots"/"leaves" of TopGit DAG
 143 #     1   |  1  |   No   | "heads"/"ending/final patches of patch series"
 144 #    -1   |  0  |   No   | "heads"/"ending/final patches of patch series"
 145 #    -1   |  1  |   Yes  | "starting/first patches of patch series"
 146 #
 147 # with all but the three special cases just mentioned (the three "No" rows
 148 # in the table above), loop detection is performed first to avoid problems
 149 # and will cause an exit status of EX_DATAERR (65) if loops are detected in
 150 # which case no output is produced; the recommended way to check for loops is
 151 # with an empty startb, steps=-1 and chklps=1 and redirecting output to
 152 # /dev/null (or capturing the heads output on success) and then testing the
 153 # exit status for an EX_DATAERR (65) result
 154 #
 155 # the effect of "navigation" is as though the heads containing startb are first
 156 # determined (by walking "forward" as far as possible) then those heads are
 157 # enumerated in postfix order (each node is only visited once though -- the
 158 # first time it's encountered in the enumeration) forming one or more linear
 159 # lists (this step is not possible if loops are present hence the loop
 160 # detection check).  Then the requested number of steps are taken from the
 161 # location startb appears in each list and the possibly none, possibly many
 162 # results are output one per line like so:
 163 #
 164 #   <result_branch_name> <containing_topgit_head_branch_name>...
 165 #
 166 # there will always be at least one containing branch name even if it's the
 167 # same as the result branch name (unless startb is empty and steps is negative
 168 # or 1), but there could be more (space separated) if the branch is part of
 169 # more than one patch series; if fldone is true then only the first field shown
 170 # above (the result branch name) will be output on each line no matter what
 171 #
 172
 173 BEGIN { exitcode = ""; stderr = "exec cat>&2"; }
 174 function exitnow(e) { exitcode=e; exit e }
 175 END { if (exitcode != "") exit exitcode }
 176
 177 BEGIN {
 178         if (steps !~ /^-?[0-9]+$/) exitnow(2)
 179         steps = 0 + steps
 180         startcnt = 0
 181         cnt = split(startb, ascratch, " ")
 182         for (i = 1; i <= cnt; ++i) {
 183                 if (!(ascratch[i] in seen)) {
 184                         starts[++startcnt] = ascratch[i]
 185                         seen[ascratch[i]] = 1
 186                 }
 187         }
 188         if (!startcnt) {
 189                 if (!steps) exitnow(2)
 190                 if (steps < 0 && !rev) {
 191                         rev = 1
 192                         steps = 1
 193                 }
 194                 if (steps < 0) fldone = 1
 195         }
 196         if (steps < 0) steps = -1
 197         inconly = 0
 198         cnt = split(inclbr, scratch, " ")
 199         if (cnt) {
 200                 inconly = 1
 201                 for (i = 1; i <= cnt; ++i) incnames[scratch[i]] = 1
 202         }
 203         cnt = split(exclbr, scratch, " ")
 204         for (i = 1; i <= cnt; ++i) excnames[scratch[i]] = 1
 205         prunecnt = split(pruneb, prunes, " ")
 206         nots = 0
 207         for (node in prunes) if (node == "^") ++nots
 208         if (nots >= prunecnt) prunecnt = 0
 209         ordidx = 0
 210 }
 211
 212 function quotevar(v) {
 213         gsub(/\047/, "\047\\\047\047", v)
 214         return "\047" v "\047"
 215 }
 216
 217 function rmfiles() {
 218         rmlist = ""
 219         if (rmbr && brfile != "") rmlist = rmlist " " quotevar(brfile)
 220         if (rman && anfile != "") rmlist = rmlist " " quotevar(anfile)
 221         if (rmlist != "") {
 222                 system("rm -f" rmlist)
 223                 rmbr = 0
 224                 brfile = ""
 225                 rman = 0
 226                 anfile = ""
 227         }
 228 }
 229 END { rmfiles() }
 230
 231 function init(abranch, _e) {
 232         if (brfile != "") {
 233                 if (tgonly) {
 234                         while ((_e = (getline abranch <brfile)) > 0) {
 235                                 if (abranch != "") tgish[abranch] = 1
 236                         }
 237                         close(brfile)
 238                         if (_e < 0) exitnow(2)
 239                 }
 240         }
 241         if (anfile != "") {
 242                 while ((_e = (getline abranch <anfile)) > 0) {
 243                         if (abranch != "") ann[abranch] = 1
 244                 }
 245                 close(anfile)
 246                 if (_e < 0) exitnow(2)
 247         }
 248         rmfiles()
 249 }
 250
 251 function incledge(b1, b2) {
 252         return !(b1 in excnames) && !(b2 in excnames) &&
 253                (!inconly || b1 in incnames || b2 in incnames)
 254 }
 255
 256 function wanted(abranch) {
 257         return (withan || !(abranch in ann)) && (!tgonly || (abranch in tgish))
 258 }
 259
 260 NR == 1 {init()}
 261
 262 function addlink(anarray, anode, alink, _linkstr) {
 263         if (alink == "") return
 264         _linkstr = anarray[anode]
 265         if (length(_linkstr) < 3) {
 266                 _linkstr = " " alink " "
 267         } else if (!index(_linkstr, " " alink " ")) {
 268                 _linkstr = _linkstr alink " "
 269         }
 270         anarray[anode] = _linkstr
 271 }
 272
 273 NF == 2 && $1 != "" && $2 != "" && incledge($1, $2) {
 274         if (!($2 in nodes)) {
 275                 nodes[$2] = 1
 276                 ordered[++ordidx] = $2
 277         }
 278         if (!($1 in nodes)) {
 279                 nodes[$1] = 1
 280                 ordered[++ordidx] = $1
 281         }
 282         if ($1 != $2 && !($1 in ann)) {
 283                 addlink(incoming, $2, $1)
 284                 addlink(outgoing, $1, $2)
 285                 edgenodes[$1] = 1
 286                 edgenodes[$2] = 1
 287         }
 288 }
 289
 290 function checkloops() {
 291         for (edge in incoming) curinc[edge] = incoming[edge]
 292         for (node in edgenodes) if (!(node in curinc)) curnodes[node] = 1
 293         for (;;) {
 294                 node = ""
 295                 for (node in curnodes) break
 296                 if (node == "") break
 297                 delete curnodes[node]
 298                 if ((node in outgoing) && split(outgoing[node], links, " ")) {
 299                         for (linki in links) {
 300                                 link = links[linki]
 301                                 if (link in curinc) {
 302                                         inclist = curinc[link]
 303                                         if ((idx = index(inclist, " " node " "))) {
 304                                                 inclist = substr(inclist, 1, idx) \
 305                                                         substr(inclist,
 306                                                                idx + length(node) + 2)
 307                                                 if (length(inclist) < 3) {
 308                                                         delete curinc[link]
 309                                                         curnodes[link] = 1
 310                                                 } else {
 311                                                         curinc[link] = inclist
 312                                                 }
 313                                         }
 314                                 }
 315                         }
 316                 }
 317                 delete edgenodes[node]
 318         }
 319         for (node in edgenodes) exitnow(65)
 320 }
 321
 322 function collectstarts(anarray, _i) {
 323         for (_i = 1; _i <= ordidx; ++_i) {
 324                 if ((ordered[_i] in nodes) && !(ordered[_i] in anarray))
 325                         starts[++startcnt] = ordered[_i]
 326         }
 327 }
 328
 329 function marknodes(anode, val, _outlinks, _oneout) {
 330         if (!(anode in nodes)) return
 331         if (nodes[anode] == val) return
 332         nodes[anode] = val
 333         if (anode in outgoing) {
 334                 split(outgoing[anode], _outlinks, " ")
 335                 for (_oneout in _outlinks) marknodes(_outlinks[_oneout], val)
 336         }
 337 }
 338
 339 function getheads_(anode, theheads, seen, headcnt, _cnt, _i, _inlinks) {
 340         if (!(anode in nodes)) return
 341         if (!(anode in incoming)) {
 342                 if (!(anode in seen)) {
 343                         seen[anode] = 1
 344                         theheads[++headcnt[0]] = anode
 345                 }
 346                 return
 347         }
 348         _cnt = split(incoming[anode], _inlinks, " ")
 349         for (_i = 1; _i <= _cnt; ++_i)
 350                 getheads_(_inlinks[_i], theheads, seen, headcnt)
 351 }
 352
 353 function getheads(anode, theheads, _seen, _headcnt) {
 354         split("", theheads, " ")
 355         split("", _seen, " ")
 356         _headcnt[0] = 0
 357         getheads_(anode, theheads, _seen, _headcnt)
 358         return _headcnt[0]
 359 }
 360
 361 function getpath_(anode, pnodes, arlinks, _seen, _pcnt, _children, _ccnt, _i) {
 362         if (anode in _seen) return
 363         _seen[anode] = 1
 364         if (anode in arlinks) {
 365                 _ccnt = split(arlinks[anode], _children, " ")
 366                 for (_i = 1; _i <= _ccnt; ++_i)
 367                         getpath_(_children[_i], pnodes, arlinks, _seen, _pcnt)
 368         }
 369         pnodes[++_pcnt[0]] = anode
 370 }
 371
 372 function getpath(anode, pnodes, arlinks, _seen, _pcnt, _z) {
 373         split("", pnodes, " ");
 374         split("", _seen, " ")
 375         _pcnt[0] = 0
 376         getpath_(anode, pnodes, arlinks, _seen, _pcnt)
 377         return _pcnt[0]
 378         printf "%s", "PATH " anode " |"
 379         for (_z = 1; _z <= _pcnt[0]; ++_z) printf " %s" pnodes[_z]
 380         printf "\n"
 381 }
 382
 383 END {
 384         if (chklps || startcnt || prunecnt || steps != 1) checkloops()
 385         if (prunecnt) {
 386                 state = 1
 387                 for (i = 1; i <= prunecnt; ++ i) {
 388                         onep = prunes[i]
 389                         if (onep == "^") {
 390                                 state = 1 - state
 391                                 continue
 392                         }
 393                         if (substr(onep, 1, 1) == "^") {
 394                                 if (state) negnodes[substr(onep, 2)] = 1
 395                                 else poznodes[substr(onep, 2)] = 1
 396                         } else {
 397                                 if (state) poznodes[onep] = 1
 398                                 else negnodes[onep] = 1
 399                         }
 400                 }
 401                 for (onep in poznodes) {
 402                         for (anode in nodes) nodes[anode] = ""
 403                         break
 404                 }
 405                 for (onep in poznodes) marknodes(onep, 1)
 406                 for (onep in negnodes) marknodes(onep, 0)
 407                 for (onep in nodes) if (nodes[onep]) tmpa[onep] = 1
 408                 split("", nodes, " ")
 409                 for (onep in tmpa) if (tmpa[onep]) nodes[onep] = 1
 410                 split("", tmpa, " ")
 411                 for (onep in outgoing) tmpa[onep] = outgoing[onep]
 412                 split("", incoming, " ")
 413                 split("", outgoing, " ")
 414                 for (onep in nodes) {
 415                         if (onep in tmpa) {
 416                                 lcnt = split(tmpa[onep], links, " ")
 417                                 for (i = 1; i <= lcnt; ++i) {
 418                                         dest = links[i]
 419                                         if (dest in nodes) {
 420                                                 addlink(incoming, dest, onep)
 421                                                 addlink(outgoing, onep, dest)
 422                                         }
 423                                 }
 424                         }
 425                 }
 426         }
 427         if (!startcnt) {
 428                 if (steps < 0 && !rev) {
 429                         print "internal error: non-optimized steps=-1 rev=0" |stderr
 430                         exitnow(70) # EX_SOFTWARE
 431                 }
 432                 if (rev)
 433                         collectstarts(incoming)
 434                 else
 435                         collectstarts(outgoing)
 436                 if (steps > 0) --steps
 437                 if (steps == 0 || !startcnt) {
 438                         for (i = 1; i <= startcnt; ++i)
 439                                 if (wanted(starts[i])) print starts[i]
 440                         exit 0
 441                 }
 442         }
 443         resultcnt = 0
 444         for (i = 1; i <= startcnt; ++i) {
 445                 headcnt = getheads(starts[i], heads)
 446                 for (j = 1; j <= headcnt; ++j) {
 447                         pathcnt = getpath(heads[j], path, outgoing)
 448                         for (pathidx = 1; pathidx <= pathcnt; ++pathidx)
 449                                 if (path[pathidx] == starts[i]) break
 450                         if (pathidx > pathcnt) continue
 451                         adjsteps = steps
 452                         if (!wanted(path[pathidx])) {
 453                                 if (!steps) continue
 454                                 if (steps > 0) --adjsteps
 455                                 incr = rev ? -1 : 1
 456                                 do pathidx += incr
 457                                 while (pathidx >= 1 && pathidx <= pathcnt &&
 458                                         !wanted(path[pathidx]))
 459                         }
 460                         if (pathidx >= 1 && pathidx <= pathcnt) {
 461                                 oldcnt = pathcnt
 462                                 newstart = path[pathidx]
 463                                 pathidx = 0
 464                                 pathcnt = 0
 465                                 for (k = 1; k <= oldcnt; ++k) {
 466                                         if (wanted(path[k])) {
 467                                                 path[++pathcnt] = path[k]
 468                                                 if (!pathidx && path[pathcnt] == newstart)
 469                                                         pathidx = pathcnt
 470                                         }
 471                                 }
 472                                 if (!pathidx) {
 473                                         print "internal error: wanted disappeared" |stderr
 474                                         exitnow(70) # EX_SOFTWARE
 475                                 }
 476                         }
 477                         if (steps < 0) {
 478                                 pathidx = rev ? 1 : pathcnt
 479                         } else {
 480                                 if (rev) pathidx -= adjsteps
 481                                 else pathidx += adjsteps
 482                         }
 483                         if (pin) {
 484                                 if (pathidx < 1) pathidx = 1
 485                                 else if (pathidx > pathcnt) pathidx = pathcnt
 486                         }
 487                         if (pathidx < 1 || pathidx > pathcnt) continue
 488                         aresult = path[pathidx]
 489                         if (aresult in seenresults) {
 490                                 resultidx = seenresults[aresult]
 491                                 resultlist = results[resultidx]
 492                         } else {
 493                                 resultidx = ++resultcnt
 494                                 resultnames[resultidx] = aresult
 495                                 seenresults[aresult] = resultidx
 496                                 resultlist = " "
 497                         }
 498                         if (!index(resultlist, " " heads[j] " ")) {
 499                                 resultlist = resultlist heads[j] " "
 500                                 results[resultidx] = resultlist
 501                         }
 502                 }
 503         }
 504         for (i = 1; i <= resultcnt; ++i) {
 505                 if (fldone) print resultnames[i]
 506                 else print resultnames[i] substr(results[i], 1, length(results[i]) - 1)
 507         }
 508 }