Merge branch 'maint-0.4.8'
[tor.git] / scripts / maint / annotate_ifdef_directives.py
blobeeca0bbb17d00e5bd455f278c54814201739c645
1 #!/usr/bin/env python
2 # Copyright (c) 2017-2019, The Tor Project, Inc.
3 # See LICENSE for licensing information
5 r"""
6 This script iterates over a list of C files. For each file, it looks at the
7 #if/#else C macros, and annotates them with comments explaining what they
8 match.
10 For example, it replaces this kind of input...
12 >>> INPUT = '''
13 ... #ifdef HAVE_OCELOT
14 ... C code here
15 ... #if MIMSY == BOROGROVE
16 ... block 1
17 ... block 1
18 ... block 1
19 ... block 1
20 ... #else
21 ... block 2
22 ... block 2
23 ... block 2
24 ... block 2
25 ... #endif
26 ... #endif
27 ... '''
29 With this kind of output:
30 >>> EXPECTED_OUTPUT = '''
31 ... #ifdef HAVE_OCELOT
32 ... C code here
33 ... #if MIMSY == BOROGROVE
34 ... block 1
35 ... block 1
36 ... block 1
37 ... block 1
38 ... #else /* !(MIMSY == BOROGROVE) */
39 ... block 2
40 ... block 2
41 ... block 2
42 ... block 2
43 ... #endif /* MIMSY == BOROGROVE */
44 ... #endif /* defined(HAVE_OCELOT) */
45 ... '''
47 Here's how to use it:
48 >>> import sys
49 >>> if sys.version_info.major < 3: from cStringIO import StringIO
50 >>> if sys.version_info.major >= 3: from io import StringIO
52 >>> OUTPUT = StringIO()
53 >>> translate(StringIO(INPUT), OUTPUT)
54 >>> assert OUTPUT.getvalue() == EXPECTED_OUTPUT
56 Note that only #else and #endif lines are annotated. Existing comments
57 on those lines are removed.
58 """
60 # Future imports for Python 2.7, mandatory in 3.0
61 from __future__ import division
62 from __future__ import print_function
63 from __future__ import unicode_literals
65 import re
67 # Any block with fewer than this many lines does not need annotations.
68 LINE_OBVIOUSNESS_LIMIT = 4
70 # Maximum line width. This includes a terminating newline character.
72 # (This is the maximum before encoding, so that if the the operating system
73 # uses multiple characters to encode newline, that's still okay.)
74 LINE_WIDTH=80
76 class Problem(Exception):
77 pass
79 def close_parens_needed(expr):
80 """Return the number of left-parentheses needed to make 'expr'
81 balanced.
83 >>> close_parens_needed("1+2")
85 >>> close_parens_needed("(1 + 2)")
87 >>> close_parens_needed("(1 + 2")
89 >>> close_parens_needed("(1 + (2 *")
91 >>> close_parens_needed("(1 + (2 * 3) + (4")
93 """
94 return expr.count("(") - expr.count(")")
96 def truncate_expression(expr, new_width):
97 """Given a parenthesized C expression in 'expr', try to return a new
98 expression that is similar to 'expr', but no more than 'new_width'
99 characters long.
101 Try to return an expression with balanced parentheses.
103 >>> truncate_expression("1+2+3", 8)
104 '1+2+3'
105 >>> truncate_expression("1+2+3+4+5", 8)
106 '1+2+3...'
107 >>> truncate_expression("(1+2+3+4)", 8)
108 '(1+2...)'
109 >>> truncate_expression("(1+(2+3+4))", 8)
110 '(1+...)'
111 >>> truncate_expression("(((((((((", 8)
112 '((...))'
114 if len(expr) <= new_width:
115 # The expression is already short enough.
116 return expr
118 ellipsis = "..."
120 # Start this at the minimum that we might truncate.
121 n_to_remove = len(expr) + len(ellipsis) - new_width
123 # Try removing characters, one by one, until we get something where
124 # re-balancing the parentheses still fits within the limit.
125 while n_to_remove < len(expr):
126 truncated = expr[:-n_to_remove] + ellipsis
127 truncated += ")" * close_parens_needed(truncated)
128 if len(truncated) <= new_width:
129 return truncated
130 n_to_remove += 1
132 return ellipsis
134 def commented_line(fmt, argument, maxwidth=LINE_WIDTH):
135 # (This is a raw docstring so that our doctests can use \.)
136 r"""
137 Return fmt%argument, for use as a commented line. If the line would
138 be longer than maxwidth, truncate argument but try to keep its
139 parentheses balanced.
141 Requires that fmt%"..." will fit into maxwidth characters.
143 Requires that fmt ends with a newline.
145 >>> commented_line("/* %s */\n", "hello world", 32)
146 '/* hello world */\n'
147 >>> commented_line("/* %s */\n", "hello world", 15)
148 '/* hello... */\n'
149 >>> commented_line("#endif /* %s */\n", "((1+2) && defined(FOO))", 32)
150 '#endif /* ((1+2) && defi...) */\n'
153 The default line limit is 80 characters including the newline:
155 >>> long_argument = "long " * 100
156 >>> long_line = commented_line("#endif /* %s */\n", long_argument)
157 >>> len(long_line)
160 >>> long_line[:40]
161 '#endif /* long long long long long long '
162 >>> long_line[40:]
163 'long long long long long long lon... */\n'
165 If a line works out to being 80 characters naturally, it isn't truncated,
166 and no ellipsis is added.
168 >>> medium_argument = "a"*66
169 >>> medium_line = commented_line("#endif /* %s */\n", medium_argument)
170 >>> len(medium_line)
172 >>> "..." in medium_line
173 False
174 >>> medium_line[:40]
175 '#endif /* aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
176 >>> medium_line[40:]
177 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa */\n'
181 assert fmt.endswith("\n")
182 result = fmt % argument
183 if len(result) <= maxwidth:
184 return result
185 else:
186 # How long can we let the argument be? Try filling in the
187 # format with an empty argument to find out.
188 max_arg_width = maxwidth - len(fmt % "")
189 result = fmt % truncate_expression(argument, max_arg_width)
190 assert len(result) <= maxwidth
191 return result
193 def negate(expr):
194 """Return a negated version of expr; try to avoid double-negation.
196 We usually wrap expressions in parentheses and add a "!".
197 >>> negate("A && B")
198 '!(A && B)'
200 But if we recognize the expression as negated, we can restore it.
201 >>> negate(negate("A && B"))
202 'A && B'
204 The same applies for defined(FOO).
205 >>> negate("defined(FOO)")
206 '!defined(FOO)'
207 >>> negate(negate("defined(FOO)"))
208 'defined(FOO)'
210 Internal parentheses don't confuse us:
211 >>> negate("!(FOO) && !(BAR)")
212 '!(!(FOO) && !(BAR))'
215 expr = expr.strip()
216 # See whether we match !(...), with no intervening close-parens.
217 m = re.match(r'^!\s*\(([^\)]*)\)$', expr)
218 if m:
219 return m.group(1)
222 # See whether we match !?defined(...), with no intervening close-parens.
223 m = re.match(r'^(!?)\s*(defined\([^\)]*\))$', expr)
224 if m:
225 if m.group(1) == "!":
226 prefix = ""
227 else:
228 prefix = "!"
229 return prefix + m.group(2)
231 return "!(%s)" % expr
233 def uncomment(s):
235 Remove existing trailing comments from an #else or #endif line.
237 s = re.sub(r'//.*','',s)
238 s = re.sub(r'/\*.*','',s)
239 return s.strip()
241 def translate(f_in, f_out):
243 Read a file from f_in, and write its annotated version to f_out.
245 # A stack listing our current if/else state. Each member of the stack
246 # is a list of directives. Each directive is a 3-tuple of
247 # (command, rest, lineno)
248 # where "command" is one of if/ifdef/ifndef/else/elif, and where
249 # "rest" is an expression in a format suitable for use with #if, and where
250 # lineno is the line number where the directive occurred.
251 stack = []
252 # the stack element corresponding to the top level of the file.
253 whole_file = []
254 cur_level = whole_file
255 lineno = 0
256 for line in f_in:
257 lineno += 1
258 m = re.match(r'\s*#\s*(if|ifdef|ifndef|else|endif|elif)\b\s*(.*)',
259 line)
260 if not m:
261 # no directive, so we can just write it out.
262 f_out.write(line)
263 continue
264 command,rest = m.groups()
265 if command in ("if", "ifdef", "ifndef"):
266 # The #if directive pushes us one level lower on the stack.
267 if command == 'ifdef':
268 rest = "defined(%s)"%uncomment(rest)
269 elif command == 'ifndef':
270 rest = "!defined(%s)"%uncomment(rest)
271 elif rest.endswith("\\"):
272 rest = rest[:-1]+"..."
274 rest = uncomment(rest)
276 new_level = [ (command, rest, lineno) ]
277 stack.append(cur_level)
278 cur_level = new_level
279 f_out.write(line)
280 elif command in ("else", "elif"):
281 # We stay at the same level on the stack. If we have an #else,
282 # we comment it.
283 if len(cur_level) == 0 or cur_level[-1][0] == 'else':
284 raise Problem("Unexpected #%s on %d"% (command,lineno))
285 if (len(cur_level) == 1 and command == 'else' and
286 lineno > cur_level[0][2] + LINE_OBVIOUSNESS_LIMIT):
287 f_out.write(commented_line("#else /* %s */\n",
288 negate(cur_level[0][1])))
289 else:
290 f_out.write(line)
291 cur_level.append((command, rest, lineno))
292 else:
293 # We pop one element on the stack, and comment an endif.
294 assert command == 'endif'
295 if len(stack) == 0:
296 raise Problem("Unmatched #%s on %s"% (command,lineno))
297 if lineno <= cur_level[0][2] + LINE_OBVIOUSNESS_LIMIT:
298 f_out.write(line)
299 elif len(cur_level) == 1 or (
300 len(cur_level) == 2 and cur_level[1][0] == 'else'):
301 f_out.write(commented_line("#endif /* %s */\n",
302 cur_level[0][1]))
303 else:
304 f_out.write(commented_line("#endif /* %s || ... */\n",
305 cur_level[0][1]))
306 cur_level = stack.pop()
307 if len(stack) or cur_level != whole_file:
308 raise Problem("Missing #endif")
310 if __name__ == '__main__':
312 import sys,os
314 if sys.argv[1] == "--self-test":
315 import doctest
316 doctest.testmod()
317 sys.exit(0)
319 for fn in sys.argv[1:]:
320 with open(fn+"_OUT", 'w') as output_file:
321 translate(open(fn, 'r'), output_file)
322 os.rename(fn+"_OUT", fn)