scripts/maint/annotate_ifdef_directives.py

   1 #!/usr/bin/env python
   2 # Copyright (c) 2017-2019, The Tor Project, Inc.
   3 # See LICENSE for licensing information
   4
   5 r"""
   6 This script iterates over a list of C files. For each file, it looks at the
   7 #if/#else C macros, and annotates them with comments explaining what they
   8 match.
   9
  10 For example, it replaces this kind of input...
  11
  12 >>> INPUT = '''
  13 ... #ifdef HAVE_OCELOT
  14 ...   C code here
  15 ... #if MIMSY == BOROGROVE
  16 ...   block 1
  17 ...   block 1
  18 ...   block 1
  19 ...   block 1
  20 ... #else
  21 ...   block 2
  22 ...   block 2
  23 ...   block 2
  24 ...   block 2
  25 ... #endif
  26 ... #endif
  27 ... '''
  28
  29 With this kind of output:
  30 >>> EXPECTED_OUTPUT = '''
  31 ... #ifdef HAVE_OCELOT
  32 ...   C code here
  33 ... #if MIMSY == BOROGROVE
  34 ...   block 1
  35 ...   block 1
  36 ...   block 1
  37 ...   block 1
  38 ... #else /* !(MIMSY == BOROGROVE) */
  39 ...   block 2
  40 ...   block 2
  41 ...   block 2
  42 ...   block 2
  43 ... #endif /* MIMSY == BOROGROVE */
  44 ... #endif /* defined(HAVE_OCELOT) */
  45 ... '''
  46
  47 Here's how to use it:
  48 >>> import sys
  49 >>> if sys.version_info.major < 3: from cStringIO import StringIO
  50 >>> if sys.version_info.major >= 3: from io import StringIO
  51
  52 >>> OUTPUT = StringIO()
  53 >>> translate(StringIO(INPUT), OUTPUT)
  54 >>> assert OUTPUT.getvalue() == EXPECTED_OUTPUT
  55
  56 Note that only #else and #endif lines are annotated.  Existing comments
  57 on those lines are removed.
  58 """
  59
  60 # Future imports for Python 2.7, mandatory in 3.0
  61 from __future__ import division
  62 from __future__ import print_function
  63 from __future__ import unicode_literals
  64
  65 import re
  66
  67 # Any block with fewer than this many lines does not need annotations.
  68 LINE_OBVIOUSNESS_LIMIT = 4
  69
  70 # Maximum line width.  This includes a terminating newline character.
  71 #
  72 # (This is the maximum before encoding, so that if the the operating system
  73 # uses multiple characters to encode newline, that's still okay.)
  74 LINE_WIDTH=80
  75
  76 class Problem(Exception):
  77     pass
  78
  79 def close_parens_needed(expr):
  80     """Return the number of left-parentheses needed to make 'expr'
  81        balanced.
  82
  83     >>> close_parens_needed("1+2")
  84     0
  85     >>> close_parens_needed("(1 + 2)")
  86     0
  87     >>> close_parens_needed("(1 + 2")
  88     1
  89     >>> close_parens_needed("(1 + (2 *")
  90     2
  91     >>> close_parens_needed("(1 + (2 * 3) + (4")
  92     2
  93     """
  94     return expr.count("(") - expr.count(")")
  95
  96 def truncate_expression(expr, new_width):
  97     """Given a parenthesized C expression in 'expr', try to return a new
  98        expression that is similar to 'expr', but no more than 'new_width'
  99        characters long.
 100
 101        Try to return an expression with balanced parentheses.
 102
 103     >>> truncate_expression("1+2+3", 8)
 104     '1+2+3'
 105     >>> truncate_expression("1+2+3+4+5", 8)
 106     '1+2+3...'
 107     >>> truncate_expression("(1+2+3+4)", 8)
 108     '(1+2...)'
 109     >>> truncate_expression("(1+(2+3+4))", 8)
 110     '(1+...)'
 111     >>> truncate_expression("(((((((((", 8)
 112     '((...))'
 113     """
 114     if len(expr) <= new_width:
 115         # The expression is already short enough.
 116         return expr
 117
 118     ellipsis = "..."
 119
 120     # Start this at the minimum that we might truncate.
 121     n_to_remove = len(expr) + len(ellipsis) - new_width
 122
 123     # Try removing characters, one by one, until we get something where
 124     # re-balancing the parentheses still fits within the limit.
 125     while n_to_remove < len(expr):
 126         truncated = expr[:-n_to_remove] + ellipsis
 127         truncated += ")" * close_parens_needed(truncated)
 128         if len(truncated) <= new_width:
 129             return truncated
 130         n_to_remove += 1
 131
 132     return ellipsis
 133
 134 def commented_line(fmt, argument, maxwidth=LINE_WIDTH):
 135     # (This is a raw docstring so that our doctests can use \.)
 136     r"""
 137     Return fmt%argument, for use as a commented line.  If the line would
 138     be longer than maxwidth, truncate argument but try to keep its
 139     parentheses balanced.
 140
 141     Requires that fmt%"..." will fit into maxwidth characters.
 142
 143     Requires that fmt ends with a newline.
 144
 145     >>> commented_line("/* %s */\n", "hello world", 32)
 146     '/* hello world */\n'
 147     >>> commented_line("/* %s */\n", "hello world", 15)
 148     '/* hello... */\n'
 149     >>> commented_line("#endif /* %s */\n", "((1+2) && defined(FOO))", 32)
 150     '#endif /* ((1+2) && defi...) */\n'
 151
 152
 153     The default line limit is 80 characters including the newline:
 154
 155     >>> long_argument = "long " * 100
 156     >>> long_line = commented_line("#endif /* %s */\n", long_argument)
 157     >>> len(long_line)
 158     80
 159
 160     >>> long_line[:40]
 161     '#endif /* long long long long long long '
 162     >>> long_line[40:]
 163     'long long long long long long lon... */\n'
 164
 165     If a line works out to being 80 characters naturally, it isn't truncated,
 166     and no ellipsis is added.
 167
 168     >>> medium_argument = "a"*66
 169     >>> medium_line = commented_line("#endif /* %s */\n", medium_argument)
 170     >>> len(medium_line)
 171     80
 172     >>> "..." in medium_line
 173     False
 174     >>> medium_line[:40]
 175     '#endif /* aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
 176     >>> medium_line[40:]
 177     'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa */\n'
 178
 179
 180     """
 181     assert fmt.endswith("\n")
 182     result = fmt % argument
 183     if len(result) <= maxwidth:
 184         return result
 185     else:
 186         # How long can we let the argument be?  Try filling in the
 187         # format with an empty argument to find out.
 188         max_arg_width = maxwidth - len(fmt % "")
 189         result = fmt % truncate_expression(argument, max_arg_width)
 190         assert len(result) <= maxwidth
 191         return result
 192
 193 def negate(expr):
 194     """Return a negated version of expr; try to avoid double-negation.
 195
 196     We usually wrap expressions in parentheses and add a "!".
 197     >>> negate("A && B")
 198     '!(A && B)'
 199
 200     But if we recognize the expression as negated, we can restore it.
 201     >>> negate(negate("A && B"))
 202     'A && B'
 203
 204     The same applies for defined(FOO).
 205     >>> negate("defined(FOO)")
 206     '!defined(FOO)'
 207     >>> negate(negate("defined(FOO)"))
 208     'defined(FOO)'
 209
 210     Internal parentheses don't confuse us:
 211     >>> negate("!(FOO) && !(BAR)")
 212     '!(!(FOO) && !(BAR))'
 213
 214     """
 215     expr = expr.strip()
 216     # See whether we match !(...), with no intervening close-parens.
 217     m = re.match(r'^!\s*\(([^\)]*)\)$', expr)
 218     if m:
 219         return m.group(1)
 220
 221
 222     # See whether we match !?defined(...), with no intervening close-parens.
 223     m = re.match(r'^(!?)\s*(defined\([^\)]*\))$', expr)
 224     if m:
 225         if m.group(1) == "!":
 226             prefix = ""
 227         else:
 228             prefix = "!"
 229         return prefix + m.group(2)
 230
 231     return "!(%s)" % expr
 232
 233 def uncomment(s):
 234     """
 235     Remove existing trailing comments from an #else or #endif line.
 236     """
 237     s = re.sub(r'//.*','',s)
 238     s = re.sub(r'/\*.*','',s)
 239     return s.strip()
 240
 241 def translate(f_in, f_out):
 242     """
 243     Read a file from f_in, and write its annotated version to f_out.
 244     """
 245     # A stack listing our current if/else state.  Each member of the stack
 246     # is a list of directives.  Each directive is a 3-tuple of
 247     #    (command, rest, lineno)
 248     # where "command" is one of if/ifdef/ifndef/else/elif, and where
 249     # "rest" is an expression in a format suitable for use with #if, and where
 250     # lineno is the line number where the directive occurred.
 251     stack = []
 252     # the stack element corresponding to the top level of the file.
 253     whole_file = []
 254     cur_level = whole_file
 255     lineno = 0
 256     for line in f_in:
 257         lineno += 1
 258         m = re.match(r'\s*#\s*(if|ifdef|ifndef|else|endif|elif)\b\s*(.*)',
 259                      line)
 260         if not m:
 261             # no directive, so we can just write it out.
 262             f_out.write(line)
 263             continue
 264         command,rest = m.groups()
 265         if command in ("if", "ifdef", "ifndef"):
 266             # The #if directive pushes us one level lower on the stack.
 267             if command == 'ifdef':
 268                 rest = "defined(%s)"%uncomment(rest)
 269             elif command == 'ifndef':
 270                 rest = "!defined(%s)"%uncomment(rest)
 271             elif rest.endswith("\\"):
 272                 rest = rest[:-1]+"..."
 273
 274             rest = uncomment(rest)
 275
 276             new_level = [ (command, rest, lineno) ]
 277             stack.append(cur_level)
 278             cur_level = new_level
 279             f_out.write(line)
 280         elif command in ("else", "elif"):
 281             # We stay at the same level on the stack.  If we have an #else,
 282             # we comment it.
 283             if len(cur_level) == 0 or cur_level[-1][0] == 'else':
 284                 raise Problem("Unexpected #%s on %d"% (command,lineno))
 285             if (len(cur_level) == 1 and command == 'else' and
 286                 lineno > cur_level[0][2] + LINE_OBVIOUSNESS_LIMIT):
 287                 f_out.write(commented_line("#else /* %s */\n",
 288                                            negate(cur_level[0][1])))
 289             else:
 290                 f_out.write(line)
 291             cur_level.append((command, rest, lineno))
 292         else:
 293             # We pop one element on the stack, and comment an endif.
 294             assert command == 'endif'
 295             if len(stack) == 0:
 296                 raise Problem("Unmatched #%s on %s"% (command,lineno))
 297             if lineno <= cur_level[0][2] + LINE_OBVIOUSNESS_LIMIT:
 298                 f_out.write(line)
 299             elif len(cur_level) == 1 or (
 300                     len(cur_level) == 2 and cur_level[1][0] == 'else'):
 301                 f_out.write(commented_line("#endif /* %s */\n",
 302                                            cur_level[0][1]))
 303             else:
 304                 f_out.write(commented_line("#endif /* %s || ... */\n",
 305                                            cur_level[0][1]))
 306             cur_level = stack.pop()
 307     if len(stack) or cur_level != whole_file:
 308         raise Problem("Missing #endif")
 309
 310 if __name__ == '__main__':
 311
 312     import sys,os
 313
 314     if sys.argv[1] == "--self-test":
 315         import doctest
 316         doctest.testmod()
 317         sys.exit(0)
 318
 319     for fn in sys.argv[1:]:
 320         with open(fn+"_OUT", 'w') as output_file:
 321             translate(open(fn, 'r'), output_file)
 322         os.rename(fn+"_OUT", fn)