scripts/maint/practracker/practracker.py

   1 #!/usr/bin/env python
   2
   3 """
   4 Best-practices tracker for Tor source code.
   5
   6 Go through the various .c files and collect metrics about them. If the metrics
   7 violate some of our best practices and they are not found in the optional
   8 exceptions file, then log a problem about them.
   9
  10 We currently do metrics about file size, function size and number of includes,
  11 for C source files and headers.
  12
  13 practracker.py should be run with its second argument pointing to the Tor
  14 top-level source directory like this:
  15   $ python3 ./scripts/maint/practracker/practracker.py .
  16
  17 To regenerate the exceptions file so that it allows all current
  18 problems in the Tor source, use the --regen flag:
  19   $ python3 --regen ./scripts/maint/practracker/practracker.py .
  20 """
  21
  22 # Future imports for Python 2.7, mandatory in 3.0
  23 from __future__ import division
  24 from __future__ import print_function
  25 from __future__ import unicode_literals
  26
  27 import codecs, os, sys
  28
  29 import metrics
  30 import util
  31 import problem
  32 import includes
  33 import shutil
  34
  35 # The filename of the exceptions file (it should be placed in the practracker directory)
  36 EXCEPTIONS_FNAME = "./exceptions.txt"
  37
  38 # Recommended file size
  39 MAX_FILE_SIZE = 3000 # lines
  40 # Recommended function size
  41 MAX_FUNCTION_SIZE = 100 # lines
  42 # Recommended number of #includes
  43 MAX_INCLUDE_COUNT = 50
  44 # Recommended file size for headers
  45 MAX_H_FILE_SIZE = 500
  46 # Recommended include count for headers
  47 MAX_H_INCLUDE_COUNT = 15
  48 # Recommended number of dependency violations
  49 MAX_DEP_VIOLATIONS = 0
  50
  51 # Map from problem type to functions that adjust for tolerance
  52 TOLERANCE_FNS = {
  53     'include-count': lambda n: int(n*1.1),
  54     'function-size': lambda n: int(n*1.1),
  55     'file-size': lambda n: int(n*1.02),
  56     'dependency-violation': lambda n: (n+2)
  57 }
  58
  59 #######################################################
  60
  61 # The Tor source code topdir
  62 TOR_TOPDIR = None
  63
  64 #######################################################
  65
  66 def open_file(fname):
  67     return codecs.open(fname, 'r', encoding='utf-8')
  68
  69 def consider_file_size(fname, f):
  70     """Consider the size of 'f' and yield an FileSizeItem for it.
  71     """
  72     file_size = metrics.get_file_len(f)
  73     yield problem.FileSizeItem(fname, file_size)
  74
  75 def consider_includes(fname, f):
  76     """Consider the #include count in for 'f' and yield an IncludeCountItem
  77         for it.
  78     """
  79     include_count = metrics.get_include_count(f)
  80
  81     yield problem.IncludeCountItem(fname, include_count)
  82
  83 def consider_function_size(fname, f):
  84     """yield a FunctionSizeItem for every function in f.
  85     """
  86
  87     for name, lines in metrics.get_function_lines(f):
  88         canonical_function_name = "%s:%s()" % (fname, name)
  89         yield problem.FunctionSizeItem(canonical_function_name, lines)
  90
  91 def consider_include_violations(fname, real_fname, f):
  92     n = 0
  93     for item in includes.consider_include_rules(real_fname, f):
  94         n += 1
  95     if n:
  96         yield problem.DependencyViolationItem(fname, n)
  97
  98
  99 #######################################################
 100
 101 def consider_all_metrics(files_list):
 102     """Consider metrics for all files, and yield a sequence of problem.Item
 103        object for those issues."""
 104     for fname in files_list:
 105         with open_file(fname) as f:
 106             for item in consider_metrics_for_file(fname, f):
 107                 yield item
 108
 109 def consider_metrics_for_file(fname, f):
 110     """
 111        Yield a sequence of problem.Item objects for all of the metrics in
 112        'f'.
 113     """
 114     real_fname = fname
 115     # Strip the useless part of the path
 116     if fname.startswith(TOR_TOPDIR):
 117         fname = fname[len(TOR_TOPDIR):]
 118
 119     # Get file length
 120     for item in consider_file_size(fname, f):
 121         yield item
 122
 123     # Consider number of #includes
 124     f.seek(0)
 125     for item in consider_includes(fname, f):
 126         yield item
 127
 128     # Get function length
 129     f.seek(0)
 130     for item in consider_function_size(fname, f):
 131         yield item
 132
 133     # Check for "upward" includes
 134     f.seek(0)
 135     for item in consider_include_violations(fname, real_fname, f):
 136         yield item
 137
 138 HEADER="""\
 139 # Welcome to the exceptions file for Tor's best-practices tracker!
 140 #
 141 # Each line of this file represents a single violation of Tor's best
 142 # practices -- typically, a violation that we had before practracker.py
 143 # first existed.
 144 #
 145 # There are three kinds of problems that we recognize right now:
 146 #   function-size -- a function of more than {MAX_FUNCTION_SIZE} lines.
 147 #   file-size -- a .c file of more than {MAX_FILE_SIZE} lines, or a .h
 148 #      file with more than {MAX_H_FILE_SIZE} lines.
 149 #   include-count -- a .c file with more than {MAX_INCLUDE_COUNT} #includes,
 150 #      or a .h file with more than {MAX_H_INCLUDE_COUNT} #includes.
 151 #   dependency-violation -- a file includes a header that it should
 152 #      not, according to an advisory .may_include file.
 153 #
 154 # Each line below represents a single exception that practracker should
 155 # _ignore_. Each line has four parts:
 156 #  1. The word "problem".
 157 #  2. The kind of problem.
 158 #  3. The location of the problem: either a filename, or a
 159 #     filename:functionname pair.
 160 #  4. The magnitude of the problem to ignore.
 161 #
 162 # So for example, consider this line:
 163 #    problem file-size /src/core/or/connection_or.c 3200
 164 #
 165 # It tells practracker to allow the mentioned file to be up to 3200 lines
 166 # long, even though ordinarily it would warn about any file with more than
 167 # {MAX_FILE_SIZE} lines.
 168 #
 169 # You can either edit this file by hand, or regenerate it completely by
 170 # running `make practracker-regen`.
 171 #
 172 # Remember: It is better to fix the problem than to add a new exception!
 173
 174 """.format(**globals())
 175
 176 def main(argv):
 177     import argparse
 178
 179     progname = argv[0]
 180     parser = argparse.ArgumentParser(prog=progname)
 181     parser.add_argument("--regen", action="store_true",
 182                         help="Regenerate the exceptions file")
 183     parser.add_argument("--list-overbroad", action="store_true",
 184                         help="List over-broad exceptions")
 185     parser.add_argument("--regen-overbroad", action="store_true",
 186                         help="Regenerate the exceptions file, "
 187                              "removing over-broad exceptions.")
 188     parser.add_argument("--exceptions",
 189                         help="Override the location for the exceptions file")
 190     parser.add_argument("--strict", action="store_true",
 191                         help="Make all warnings into errors")
 192     parser.add_argument("--terse", action="store_true",
 193                         help="Do not emit helpful instructions.")
 194     parser.add_argument("--max-h-file-size", default=MAX_H_FILE_SIZE,
 195                         help="Maximum lines per .h file")
 196     parser.add_argument("--max-h-include-count", default=MAX_H_INCLUDE_COUNT,
 197                         help="Maximum includes per .h file")
 198     parser.add_argument("--max-file-size", default=MAX_FILE_SIZE,
 199                         help="Maximum lines per .c file")
 200     parser.add_argument("--max-include-count", default=MAX_INCLUDE_COUNT,
 201                         help="Maximum includes per .c file")
 202     parser.add_argument("--max-function-size", default=MAX_FUNCTION_SIZE,
 203                         help="Maximum lines per function")
 204     parser.add_argument("--max-dependency-violations", default=MAX_DEP_VIOLATIONS,
 205                         help="Maximum number of dependency violations to allow")
 206     parser.add_argument("--include-dir", action="append",
 207                         default=["src"],
 208                         help="A directory (under topdir) to search for source")
 209     parser.add_argument("topdir", default=".", nargs="?",
 210                         help="Top-level directory for the tor source")
 211     args = parser.parse_args(argv[1:])
 212
 213     global TOR_TOPDIR
 214     TOR_TOPDIR = args.topdir
 215     if args.exceptions:
 216         exceptions_file = args.exceptions
 217     else:
 218         exceptions_file = os.path.join(TOR_TOPDIR, "scripts/maint/practracker", EXCEPTIONS_FNAME)
 219
 220     # 0) Configure our thresholds of "what is a problem actually"
 221     filt = problem.ProblemFilter()
 222     filt.addThreshold(problem.FileSizeItem("*.c", int(args.max_file_size)))
 223     filt.addThreshold(problem.IncludeCountItem("*.c", int(args.max_include_count)))
 224     filt.addThreshold(problem.FileSizeItem("*.h", int(args.max_h_file_size)))
 225     filt.addThreshold(problem.IncludeCountItem("*.h", int(args.max_h_include_count)))
 226     filt.addThreshold(problem.FunctionSizeItem("*.c", int(args.max_function_size)))
 227     filt.addThreshold(problem.DependencyViolationItem("*.c", int(args.max_dependency_violations)))
 228     filt.addThreshold(problem.DependencyViolationItem("*.h", int(args.max_dependency_violations)))
 229
 230     if args.list_overbroad + args.regen + args.regen_overbroad > 1:
 231         print("Cannot use more than one of --regen, --list-overbroad, and "
 232               "--regen-overbroad.",
 233               file=sys.stderr)
 234         sys.exit(1)
 235
 236     # 1) Get all the .c files we care about
 237     files_list = util.get_tor_c_files(TOR_TOPDIR, args.include_dir)
 238
 239     # 2) Initialize problem vault and load an optional exceptions file so that
 240     # we don't warn about the past
 241     if args.regen:
 242         tmpname = exceptions_file + ".tmp"
 243         tmpfile = open(tmpname, "w")
 244         problem_file = tmpfile
 245         problem_file.write(HEADER)
 246         ProblemVault = problem.ProblemVault()
 247     else:
 248         ProblemVault = problem.ProblemVault(exceptions_file)
 249         problem_file = sys.stdout
 250
 251     if args.list_overbroad or args.regen_overbroad:
 252         # If we're looking for overbroad exceptions, don't list problems
 253         # immediately to the problem file.
 254         problem_file = util.NullFile()
 255
 256     # 2.1) Adjust the exceptions so that we warn only about small problems,
 257     # and produce errors on big ones.
 258     if not (args.regen or args.list_overbroad or args.regen_overbroad or
 259             args.strict):
 260         ProblemVault.set_tolerances(TOLERANCE_FNS)
 261
 262     # 3) Go through all the files and report problems if they are not exceptions
 263     found_new_issues = 0
 264     for item in filt.filter(consider_all_metrics(files_list)):
 265         status = ProblemVault.register_problem(item)
 266         if status == problem.STATUS_ERR:
 267             print(item, file=problem_file)
 268             found_new_issues += 1
 269         elif status == problem.STATUS_WARN:
 270             # warnings always go to stdout.
 271             print("(warning) {}".format(item))
 272
 273     if args.regen:
 274         tmpfile.close()
 275         shutil.move(tmpname, exceptions_file)
 276         sys.exit(0)
 277
 278     if args.regen_overbroad:
 279         tmpname = exceptions_file + ".tmp"
 280         tmpfile = open(tmpname, "w")
 281         tmpfile.write(HEADER)
 282         for item in ProblemVault.list_exceptions_without_overbroad():
 283             print(item, file=tmpfile)
 284         tmpfile.close()
 285         shutil.move(tmpname, exceptions_file)
 286         sys.exit(0)
 287
 288     # If new issues were found, try to give out some advice to the developer on how to resolve it.
 289     if found_new_issues and not args.regen and not args.terse:
 290         new_issues_str = """\
 291 FAILURE: practracker found {} new problem(s) in the code: see warnings above.
 292
 293 Please fix the problems if you can, and update the exceptions file
 294 ({}) if you can't.
 295
 296 See doc/HACKING/HelpfulTools.md for more information on using practracker.\
 297
 298 You can disable this message by setting the TOR_DISABLE_PRACTRACKER environment
 299 variable.
 300 """.format(found_new_issues, exceptions_file)
 301         print(new_issues_str)
 302
 303     if args.list_overbroad:
 304         def k_fn(tup):
 305             return tup[0].key()
 306         for (ex,p) in sorted(ProblemVault.list_overbroad_exceptions(), key=k_fn):
 307             if p is None:
 308                 print(ex, "->", 0)
 309             else:
 310                 print(ex, "->", p.metric_value)
 311
 312
 313     sys.exit(found_new_issues)
 314
 315 if __name__ == '__main__':
 316     if os.environ.get("TOR_DISABLE_PRACTRACKER"):
 317         print("TOR_DISABLE_PRACTRACKER is set, skipping practracker tests.",
 318               file=sys.stderr)
 319         sys.exit(0)
 320     main(sys.argv)