scripts/maint/practracker/includes.py

   1 #!/usr/bin/env python
   2 # Copyright 2018 The Tor Project, Inc.  See LICENSE file for licensing info.
   3
   4 """This script looks through all the directories for files matching *.c or
   5    *.h, and checks their #include directives to make sure that only "permitted"
   6    headers are included.
   7
   8    Any #include directives with angle brackets (like #include <stdio.h>) are
   9    ignored -- only directives with quotes (like #include "foo.h") are
  10    considered.
  11
  12    To decide what includes are permitted, this script looks at a .may_include
  13    file in each directory.  This file contains empty lines, #-prefixed
  14    comments, filenames (like "lib/foo/bar.h") and file globs (like lib/*/*.h)
  15    for files that are permitted.
  16
  17    The script exits with an error if any non-permitted includes are found.
  18    .may_include files that contain "!advisory" are considered advisory.
  19    Advisory .may_include files only result in warnings, rather than errors.
  20 """
  21
  22 # Future imports for Python 2.7, mandatory in 3.0
  23 from __future__ import division
  24 from __future__ import print_function
  25 from __future__ import unicode_literals
  26
  27 import fnmatch
  28 import os
  29 import re
  30 import sys
  31
  32 if sys.version_info[0] <= 2:
  33     def open_file(fname):
  34         return open(fname, 'r')
  35 else:
  36     def open_file(fname):
  37         return open(fname, 'r', encoding='utf-8')
  38
  39 def warn(msg):
  40     print(msg, file=sys.stderr)
  41
  42 def fname_is_c(fname):
  43     """
  44     Return true if 'fname' is the name of a file that we should
  45     search for possibly disallowed #include directives.
  46     """
  47     if fname.endswith((".c", ".h")):
  48         bname = os.path.basename(fname)
  49         return not bname.startswith((".", "#"))
  50     else:
  51         return False
  52
  53 INCLUDE_PATTERN = re.compile(r'\s*#\s*include\s+"([^"]*)"')
  54 RULES_FNAME = ".may_include"
  55
  56 ALLOWED_PATTERNS = [
  57     re.compile(r'^.*\*\.(h|inc)$'),
  58     re.compile(r'^.*/.*\.h$'),
  59     re.compile(r'^ext/.*\.c$'),
  60     re.compile(r'^orconfig.h$'),
  61     re.compile(r'^micro-revision.i$'),
  62 ]
  63
  64 TOPDIR = "src"
  65
  66 def pattern_is_normal(s):
  67     for p in ALLOWED_PATTERNS:
  68         if p.match(s):
  69             return True
  70     return False
  71
  72 class Error(object):
  73     def __init__(self, location, msg, is_advisory=False):
  74         self.location = location
  75         self.msg = msg
  76         self.is_advisory = is_advisory
  77
  78     def __str__(self):
  79         return "{} at {}".format(self.msg, self.location)
  80
  81 class Rules(object):
  82     """ A 'Rules' object is the parsed version of a .may_include file. """
  83     def __init__(self, dirpath):
  84         self.dirpath = dirpath
  85         if dirpath.startswith("src/"):
  86             self.incpath = dirpath[4:]
  87         else:
  88             self.incpath = dirpath
  89         self.patterns = []
  90         self.usedPatterns = set()
  91         self.is_advisory = False
  92
  93     def addPattern(self, pattern):
  94         if pattern == "!advisory":
  95             self.is_advisory = True
  96             return
  97         if not pattern_is_normal(pattern):
  98             warn("Unusual pattern {} in {}".format(pattern, self.dirpath))
  99         self.patterns.append(pattern)
 100
 101     def includeOk(self, path):
 102         for pattern in self.patterns:
 103             if fnmatch.fnmatchcase(path, pattern):
 104                 self.usedPatterns.add(pattern)
 105                 return True
 106         return False
 107
 108     def applyToLines(self, lines, loc_prefix=""):
 109         lineno = 0
 110         for line in lines:
 111             lineno += 1
 112             m = INCLUDE_PATTERN.match(line)
 113             if m:
 114                 include = m.group(1)
 115                 if not self.includeOk(include):
 116                     yield Error("{}{}".format(loc_prefix,str(lineno)),
 117                                 "Forbidden include of {}".format(include),
 118                                 is_advisory=self.is_advisory)
 119
 120     def applyToFile(self, fname, f):
 121         for error in self.applyToLines(iter(f), "{}:".format(fname)):
 122             yield error
 123
 124     def noteUnusedRules(self):
 125         for p in self.patterns:
 126             if p not in self.usedPatterns:
 127                 warn("Pattern {} in {} was never used.".format(p, self.dirpath))
 128
 129     def getAllowedDirectories(self):
 130         allowed = []
 131         for p in self.patterns:
 132             m = re.match(r'^(.*)/\*\.(h|inc)$', p)
 133             if m:
 134                 allowed.append(m.group(1))
 135                 continue
 136             m = re.match(r'^(.*)/[^/]*$', p)
 137             if m:
 138                 allowed.append(m.group(1))
 139                 continue
 140
 141         return allowed
 142
 143
 144 def normalize_srcdir(fname):
 145     """given the name of a source directory or file, return its name
 146         relative to `src` in a unix-like format.
 147     """
 148     orig = fname
 149     dirname, dirfile = os.path.split(fname)
 150     if re.match(r'.*\.[ch]$', dirfile):
 151         fname = dirname
 152
 153     # Now we have a directory.
 154     dirname, result = os.path.split(fname)
 155     for _ in range(100):
 156         # prevent excess looping in case I missed a tricky case
 157         dirname, dirpart = os.path.split(dirname)
 158         if dirpart == 'src' or dirname == "":
 159             #print(orig,"=>",result)
 160             return result
 161         result = "{}/{}".format(dirpart,result)
 162
 163     print("No progress!")
 164     assert False
 165
 166 include_rules_cache = {}
 167
 168 def load_include_rules(fname):
 169     """ Read a rules file from 'fname', and return it as a Rules object.
 170         Return 'None' if fname does not exist.
 171     """
 172     if fname in include_rules_cache:
 173         return include_rules_cache[fname]
 174     if not os.path.exists(fname):
 175         include_rules_cache[fname] = None
 176         return None
 177     result = Rules(os.path.split(fname)[0])
 178     with open_file(fname) as f:
 179         for line in f:
 180             line = line.strip()
 181             if line.startswith("#") or not line:
 182                 continue
 183             result.addPattern(line)
 184     include_rules_cache[fname] = result
 185     return result
 186
 187 def get_all_include_rules():
 188     """Return a list of all the Rules objects we have loaded so far,
 189        sorted by their directory names."""
 190     return [ rules for (fname,rules) in
 191              sorted(include_rules_cache.items())
 192              if rules is not None ]
 193
 194 def remove_self_edges(graph):
 195     """Takes a directed graph in as an adjacency mapping (a mapping from
 196        node to a list of the nodes to which it connects).
 197
 198        Remove all edges from a node to itself."""
 199
 200     for k in list(graph):
 201         graph[k] = [ d for d in graph[k] if d != k ]
 202
 203 def closure(graph):
 204     """Takes a directed graph in as an adjacency mapping (a mapping from
 205        node to a list of the nodes to which it connects), and completes
 206        its closure.
 207     """
 208     graph = graph.copy()
 209     changed = False
 210     for k in graph.keys():
 211         graph[k] = set(graph[k])
 212     while True:
 213         for k in graph.keys():
 214             sz = len(graph[k])
 215             for v in list(graph[k]):
 216                 graph[k].update(graph.get(v, []))
 217             if sz != len(graph[k]):
 218                 changed = True
 219
 220         if not changed:
 221             return graph
 222         changed = False
 223
 224 def toposort(graph, limit=100):
 225     """Takes a directed graph in as an adjacency mapping (a mapping from
 226        node to a list of the nodes to which it connects).  Tries to
 227        perform a topological sort on the graph, arranging the nodes into
 228        "levels", such that every member of each level is only reachable
 229        by members of later levels.
 230
 231        Returns a list of the members of each level.
 232
 233        Modifies the input graph, removing every member that could be
 234        sorted.  If the graph does not become empty, then it contains a
 235        cycle.
 236
 237        "limit" is the max depth of the graph after which we give up trying
 238        to sort it and conclude we have a cycle.
 239     """
 240     all_levels = []
 241
 242     n = 0
 243     while graph:
 244         n += 0
 245         cur_level = []
 246         all_levels.append(cur_level)
 247         for k in list(graph):
 248             graph[k] = [ d for d in graph[k] if d in graph ]
 249             if graph[k] == []:
 250                 cur_level.append(k)
 251         for k in cur_level:
 252             del graph[k]
 253         n += 1
 254         if n > limit:
 255             break
 256
 257     return all_levels
 258
 259 def consider_include_rules(fname, f):
 260     dirpath = os.path.split(fname)[0]
 261     rules_fname = os.path.join(dirpath, RULES_FNAME)
 262     rules = load_include_rules(os.path.join(dirpath, RULES_FNAME))
 263     if rules is None:
 264         return
 265
 266     for err in rules.applyToFile(fname, f):
 267         yield err
 268
 269     list_unused = False
 270     log_sorted_levels = False
 271
 272 def walk_c_files(topdir="src"):
 273     """Run through all .c and .h files under topdir, looking for
 274        include-rule violations. Yield those violations."""
 275
 276     for dirpath, dirnames, fnames in os.walk(topdir):
 277         for fname in fnames:
 278             if fname_is_c(fname):
 279                 fullpath = os.path.join(dirpath,fname)
 280                 with open(fullpath) as f:
 281                     for err in consider_include_rules(fullpath, f):
 282                         yield err
 283
 284 def open_or_stdin(fname):
 285     if fname == '-':
 286         return sys.stdin
 287     else:
 288         return open(fname)
 289
 290 def check_subsys_file(fname, uses_dirs):
 291     if not uses_dirs:
 292         # We're doing a distcheck build, or for some other reason there are
 293         # no .may_include files.
 294         print("SKIPPING")
 295         return False
 296
 297     uses_dirs = { normalize_srcdir(k) : { normalize_srcdir(d) for d in v }
 298                   for (k,v) in uses_dirs.items() }
 299     uses_closure = closure(uses_dirs)
 300     ok = True
 301     previous_subsystems = []
 302
 303     with open_or_stdin(fname) as f:
 304         for line in f:
 305             _, name, fname = line.split()
 306             fname = normalize_srcdir(fname)
 307             for prev in previous_subsystems:
 308                 if fname in uses_closure[prev]:
 309                     print("INVERSION: {} uses {}".format(prev,fname))
 310                     ok = False
 311             previous_subsystems.append(fname)
 312     return not ok
 313
 314 def run_check_includes(topdir, list_unused=False, log_sorted_levels=False,
 315                        list_advisories=False, check_subsystem_order=None):
 316     trouble = False
 317
 318     for err in walk_c_files(topdir):
 319         if err.is_advisory and not list_advisories:
 320             continue
 321         print(err, file=sys.stderr)
 322         if not err.is_advisory:
 323             trouble = True
 324
 325     if trouble:
 326         warn(
 327     """To change which includes are allowed in a C file, edit the {}
 328     files in its enclosing directory.""".format(RULES_FNAME))
 329         sys.exit(1)
 330
 331     if list_unused:
 332         for rules in get_all_include_rules():
 333             rules.noteUnusedRules()
 334
 335     uses_dirs = { }
 336     for rules in get_all_include_rules():
 337         uses_dirs[rules.incpath] = rules.getAllowedDirectories()
 338
 339     remove_self_edges(uses_dirs)
 340
 341     if check_subsystem_order:
 342         if check_subsys_file(check_subsystem_order, uses_dirs):
 343             sys.exit(1)
 344
 345     all_levels = toposort(uses_dirs)
 346
 347     if log_sorted_levels:
 348         for (n, cur_level) in enumerate(all_levels):
 349             if cur_level:
 350                 print(n, cur_level)
 351
 352     if uses_dirs:
 353         print("There are circular .may_include dependencies in here somewhere:",
 354               uses_dirs)
 355         sys.exit(1)
 356
 357 def main(argv):
 358     import argparse
 359
 360     progname = argv[0]
 361     parser = argparse.ArgumentParser(prog=progname)
 362     parser.add_argument("--toposort", action="store_true",
 363                         help="Print a topologically sorted list of modules")
 364     parser.add_argument("--list-unused", action="store_true",
 365                         help="List unused lines in .may_include files.")
 366     parser.add_argument("--list-advisories", action="store_true",
 367                         help="List advisories as well as forbidden includes")
 368     parser.add_argument("--check-subsystem-order", action="store",
 369                         help="Check a list of subsystems for ordering")
 370     parser.add_argument("topdir", default="src", nargs="?",
 371                         help="Top-level directory for the tor source")
 372     args = parser.parse_args(argv[1:])
 373
 374     global TOPDIR
 375     TOPDIR = args.topdir
 376     run_check_includes(topdir=args.topdir,
 377                        log_sorted_levels=args.toposort,
 378                        list_unused=args.list_unused,
 379                        list_advisories=args.list_advisories,
 380                        check_subsystem_order=args.check_subsystem_order)
 381
 382 if __name__ == '__main__':
 383     main(sys.argv)