llvm/utils/check_ninja_deps.py

   1 #!/usr/bin/env python3
   2 #
   3 # ======- check-ninja-deps - build debugging script ----*- python -*--========#
   4 #
   5 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   6 # See https://llvm.org/LICENSE.txt for license information.
   7 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   8 #
   9 # ==------------------------------------------------------------------------==#
  10
  11 """Script to find missing formal dependencies in a build.ninja file.
  12
  13 Suppose you have a header file that's autogenerated by (for example) Tablegen.
  14 If a C++ compilation step needs to include that header, then it must be
  15 executed after the Tablegen build step that generates the header. So the
  16 dependency graph in build.ninja should have the Tablegen build step as an
  17 ancestor of the C++ one. If it does not, then there's a latent build-failure
  18 bug, because depending on the order that ninja chooses to schedule its build
  19 steps, the C++ build step could run first, and fail because the header it needs
  20 does not exist yet.
  21
  22 But because that kind of bug can easily be latent or intermittent, you might
  23 not notice, if your local test build happens to succeed. What you'd like is a
  24 way to detect problems of this kind reliably, even if they _didn't_ cause a
  25 failure on your first test.
  26
  27 This script tries to do that. It's specific to the 'ninja' build tool, because
  28 ninja has useful auxiliary output modes that produce the necessary data:
  29
  30  - 'ninja -t graph' emits the full DAG of formal dependencies derived from
  31    build.ninja (in Graphviz format)
  32
  33  - 'ninja -t deps' dumps the database of dependencies discovered at build time
  34    by finding out which headers each source file actually included
  35
  36 By cross-checking these two sources of data against each other, you can find
  37 true dependencies shown by 'deps' that are not reflected as formal dependencies
  38 in 'graph', i.e. a generated header that is required by a given source file but
  39 not forced to be built first.
  40
  41 To run it:
  42
  43  - set up a build directory using ninja as the build tool (cmake -G Ninja)
  44
  45  - in that build directory, run ninja to perform an actual build (populating
  46    the dependency database)
  47
  48  - then, in the same build directory, run this script. No arguments are needed
  49    (but -C and -f are accepted, and propagated to ninja for convenience).
  50
  51 Requirements outside core Python: the 'pygraphviz' module, available via pip or
  52 as the 'python3-pygraphviz' package in Debian and Ubuntu.
  53
  54 """
  55
  56 import sys
  57 import argparse
  58 import subprocess
  59 import pygraphviz
  60
  61
  62 def toposort(g):
  63     """Topologically sort a graph.
  64
  65     The input g is a pygraphviz graph object representing a DAG. The function
  66     yields the vertices of g in an arbitrary order consistent with the edges,
  67     so that for any edge v->w, v is output before w."""
  68
  69     # Count the number of immediate predecessors *not yet output* for each
  70     # vertex. Initially this is simply their in-degrees.
  71     ideg = {v: g.in_degree(v) for v in g.nodes_iter()}
  72
  73     # Set of vertices which can be output next, which is true if they have no
  74     # immediate predecessor that has not already been output.
  75     ready = {v for v, d in ideg.items() if d == 0}
  76
  77     # Keep outputting vertices while we have any to output.
  78     while len(ready) > 0:
  79         v = next(iter(ready))
  80         yield v
  81         ready.remove(v)
  82
  83         # Having output v, find each immediate successor w, and decrement its
  84         # 'ideg' value by 1, to indicate that one more of its predecessors has
  85         # now been output.
  86         for w in g.out_neighbors(v):
  87             ideg[w] -= 1
  88             if ideg[w] == 0:
  89                 # If that counter reaches zero, w is ready to output.
  90                 ready.add(w)
  91
  92
  93 def ancestors(g, translate=lambda x: x):
  94     """Form the set of ancestors for each vertex of a graph.
  95
  96     The input g is a pygraphviz graph object representing a DAG. The function
  97     yields a sequence of pairs (vertex, set of proper ancestors).
  98
  99     The vertex names are all mapped through 'translate' before output. This
 100     allows us to produce output referring to the label rather than the
 101     identifier of every vertex.
 102     """
 103
 104     # Store the set of (translated) ancestors for each vertex so far. a[v]
 105     # includes (the translation of) v itself.
 106     a = {}
 107
 108     for v in toposort(g):
 109         vm = translate(v)
 110
 111         # Make up a[v], based on a[predecessors of v].
 112         a[v] = {vm}  # include v itself
 113         for w in g.in_neighbors(v):
 114             a[v].update(a[w])
 115
 116         # Remove v itself from the set before yielding it, so that the caller
 117         # doesn't get the trivial dependency of v on itself.
 118         yield vm, a[v].difference({vm})
 119
 120
 121 def main():
 122     parser = argparse.ArgumentParser(
 123         description="Find missing formal dependencies on generated include "
 124         "files in a build.ninja file."
 125     )
 126     parser.add_argument("-C", "--build-dir", help="Build directory (default cwd)")
 127     parser.add_argument(
 128         "-f", "--build-file", help="Build directory (default build.ninja)"
 129     )
 130     args = parser.parse_args()
 131
 132     errs = 0
 133
 134     ninja_prefix = ["ninja"]
 135     if args.build_dir is not None:
 136         ninja_prefix.extend(["-C", args.build_dir])
 137     if args.build_file is not None:
 138         ninja_prefix.extend(["-f", args.build_file])
 139
 140     # Get the formal dependency graph and decode it using pygraphviz.
 141     g = pygraphviz.AGraph(
 142         subprocess.check_output(ninja_prefix + ["-t", "graph"]).decode("UTF-8")
 143     )
 144
 145     # Helper function to ask for the label of a vertex, which is where ninja's
 146     # Graphviz output keeps the actual file name of the target.
 147     label = lambda v: g.get_node(v).attr["label"]
 148
 149     # Start by making a list of build targets, i.e. generated files. These are
 150     # just any graph vertex with at least one predecessor.
 151     targets = set(label(v) for v in g.nodes_iter() if g.in_degree(v) > 0)
 152
 153     # Find the set of ancestors of each graph vertex. We pass in 'label' as a
 154     # translation function, so that this gives us the set of ancestor _files_
 155     # for a given _file_ rather than arbitrary numeric vertex ids.
 156     deps = dict(ancestors(g, label))
 157
 158     # Fetch the cached dependency data and check it against our formal ancestry
 159     # data.
 160     currtarget = None
 161     for line in (
 162         subprocess.check_output(ninja_prefix + ["-t", "deps"])
 163         .decode("UTF-8")
 164         .splitlines()
 165     ):
 166         # ninja -t deps output consists of stanzas of the following form,
 167         # separated by a blank line:
 168         #
 169         # target: [other information we don't need]
 170         #     some_file.cpp
 171         #     some_header.h
 172         #     other_header.h
 173         #
 174         # We parse this ad-hoc by detecting the four leading spaces in a
 175         # source-file line, and the colon in a target line. 'currtarget' stores
 176         # the last target name we saw.
 177         if line.startswith("    "):
 178             dep = line[4:]
 179             assert currtarget is not None, "Source file appeared before target"
 180
 181             # We're only interested in this dependency if it's a *generated*
 182             # file, i.e. it is in our set of targets. Also, we must check that
 183             # currtarget is actually a target we know about: the dependency
 184             # cache is not cleared when build.ninja changes, so it can contain
 185             # stale data from targets that existed only in past builds in the
 186             # same directory.
 187             if dep in targets and currtarget in deps and dep not in deps[currtarget]:
 188                 print(
 189                     "error:",
 190                     currtarget,
 191                     "requires",
 192                     dep,
 193                     "but has no dependency on it",
 194                     file=sys.stderr,
 195                 )
 196                 errs += 1
 197         elif ":" in line:
 198             currtarget = line.split(":", 1)[0]
 199
 200     if errs:
 201         sys.exit("{:d} errors found".format(errs))
 202
 203
 204 if __name__ == "__main__":
 205     main()