2 # Copyright 2018 The Tor Project, Inc. See LICENSE file for licensing info.
4 """This script looks through all the directories for files matching *.c or
5 *.h, and checks their #include directives to make sure that only "permitted"
8 Any #include directives with angle brackets (like #include <stdio.h>) are
9 ignored -- only directives with quotes (like #include "foo.h") are
12 To decide what includes are permitted, this script looks at a .may_include
13 file in each directory. This file contains empty lines, #-prefixed
14 comments, filenames (like "lib/foo/bar.h") and file globs (like lib/*/*.h)
15 for files that are permitted.
17 The script exits with an error if any non-permitted includes are found.
18 .may_include files that contain "!advisory" are considered advisory.
19 Advisory .may_include files only result in warnings, rather than errors.
22 # Future imports for Python 2.7, mandatory in 3.0
23 from __future__
import division
24 from __future__
import print_function
25 from __future__
import unicode_literals
32 if sys
.version_info
[0] <= 2:
34 return open(fname
, 'r')
37 return open(fname
, 'r', encoding
='utf-8')
40 print(msg
, file=sys
.stderr
)
42 def fname_is_c(fname
):
44 Return true if 'fname' is the name of a file that we should
45 search for possibly disallowed #include directives.
47 if fname
.endswith((".c", ".h")):
48 bname
= os
.path
.basename(fname
)
49 return not bname
.startswith((".", "#"))
53 INCLUDE_PATTERN
= re
.compile(r
'\s*#\s*include\s+"([^"]*)"')
54 RULES_FNAME
= ".may_include"
57 re
.compile(r
'^.*\*\.(h|inc)$'),
58 re
.compile(r
'^.*/.*\.h$'),
59 re
.compile(r
'^ext/.*\.c$'),
60 re
.compile(r
'^orconfig.h$'),
61 re
.compile(r
'^micro-revision.i$'),
66 def pattern_is_normal(s
):
67 for p
in ALLOWED_PATTERNS
:
73 def __init__(self
, location
, msg
, is_advisory
=False):
74 self
.location
= location
76 self
.is_advisory
= is_advisory
79 return "{} at {}".format(self
.msg
, self
.location
)
82 """ A 'Rules' object is the parsed version of a .may_include file. """
83 def __init__(self
, dirpath
):
84 self
.dirpath
= dirpath
85 if dirpath
.startswith("src/"):
86 self
.incpath
= dirpath
[4:]
88 self
.incpath
= dirpath
90 self
.usedPatterns
= set()
91 self
.is_advisory
= False
93 def addPattern(self
, pattern
):
94 if pattern
== "!advisory":
95 self
.is_advisory
= True
97 if not pattern_is_normal(pattern
):
98 warn("Unusual pattern {} in {}".format(pattern
, self
.dirpath
))
99 self
.patterns
.append(pattern
)
101 def includeOk(self
, path
):
102 for pattern
in self
.patterns
:
103 if fnmatch
.fnmatchcase(path
, pattern
):
104 self
.usedPatterns
.add(pattern
)
108 def applyToLines(self
, lines
, loc_prefix
=""):
112 m
= INCLUDE_PATTERN
.match(line
)
115 if not self
.includeOk(include
):
116 yield Error("{}{}".format(loc_prefix
,str(lineno
)),
117 "Forbidden include of {}".format(include
),
118 is_advisory
=self
.is_advisory
)
120 def applyToFile(self
, fname
, f
):
121 for error
in self
.applyToLines(iter(f
), "{}:".format(fname
)):
124 def noteUnusedRules(self
):
125 for p
in self
.patterns
:
126 if p
not in self
.usedPatterns
:
127 warn("Pattern {} in {} was never used.".format(p
, self
.dirpath
))
129 def getAllowedDirectories(self
):
131 for p
in self
.patterns
:
132 m
= re
.match(r
'^(.*)/\*\.(h|inc)$', p
)
134 allowed
.append(m
.group(1))
136 m
= re
.match(r
'^(.*)/[^/]*$', p
)
138 allowed
.append(m
.group(1))
144 def normalize_srcdir(fname
):
145 """given the name of a source directory or file, return its name
146 relative to `src` in a unix-like format.
149 dirname
, dirfile
= os
.path
.split(fname
)
150 if re
.match(r
'.*\.[ch]$', dirfile
):
153 # Now we have a directory.
154 dirname
, result
= os
.path
.split(fname
)
156 # prevent excess looping in case I missed a tricky case
157 dirname
, dirpart
= os
.path
.split(dirname
)
158 if dirpart
== 'src' or dirname
== "":
159 #print(orig,"=>",result)
161 result
= "{}/{}".format(dirpart
,result
)
163 print("No progress!")
166 include_rules_cache
= {}
168 def load_include_rules(fname
):
169 """ Read a rules file from 'fname', and return it as a Rules object.
170 Return 'None' if fname does not exist.
172 if fname
in include_rules_cache
:
173 return include_rules_cache
[fname
]
174 if not os
.path
.exists(fname
):
175 include_rules_cache
[fname
] = None
177 result
= Rules(os
.path
.split(fname
)[0])
178 with
open_file(fname
) as f
:
181 if line
.startswith("#") or not line
:
183 result
.addPattern(line
)
184 include_rules_cache
[fname
] = result
187 def get_all_include_rules():
188 """Return a list of all the Rules objects we have loaded so far,
189 sorted by their directory names."""
190 return [ rules
for (fname
,rules
) in
191 sorted(include_rules_cache
.items())
192 if rules
is not None ]
194 def remove_self_edges(graph
):
195 """Takes a directed graph in as an adjacency mapping (a mapping from
196 node to a list of the nodes to which it connects).
198 Remove all edges from a node to itself."""
200 for k
in list(graph
):
201 graph
[k
] = [ d
for d
in graph
[k
] if d
!= k
]
204 """Takes a directed graph in as an adjacency mapping (a mapping from
205 node to a list of the nodes to which it connects), and completes
210 for k
in graph
.keys():
211 graph
[k
] = set(graph
[k
])
213 for k
in graph
.keys():
215 for v
in list(graph
[k
]):
216 graph
[k
].update(graph
.get(v
, []))
217 if sz
!= len(graph
[k
]):
224 def toposort(graph
, limit
=100):
225 """Takes a directed graph in as an adjacency mapping (a mapping from
226 node to a list of the nodes to which it connects). Tries to
227 perform a topological sort on the graph, arranging the nodes into
228 "levels", such that every member of each level is only reachable
229 by members of later levels.
231 Returns a list of the members of each level.
233 Modifies the input graph, removing every member that could be
234 sorted. If the graph does not become empty, then it contains a
237 "limit" is the max depth of the graph after which we give up trying
238 to sort it and conclude we have a cycle.
246 all_levels
.append(cur_level
)
247 for k
in list(graph
):
248 graph
[k
] = [ d
for d
in graph
[k
] if d
in graph
]
259 def consider_include_rules(fname
, f
):
260 dirpath
= os
.path
.split(fname
)[0]
261 rules_fname
= os
.path
.join(dirpath
, RULES_FNAME
)
262 rules
= load_include_rules(os
.path
.join(dirpath
, RULES_FNAME
))
266 for err
in rules
.applyToFile(fname
, f
):
270 log_sorted_levels
= False
272 def walk_c_files(topdir
="src"):
273 """Run through all .c and .h files under topdir, looking for
274 include-rule violations. Yield those violations."""
276 for dirpath
, dirnames
, fnames
in os
.walk(topdir
):
278 if fname_is_c(fname
):
279 fullpath
= os
.path
.join(dirpath
,fname
)
280 with
open(fullpath
) as f
:
281 for err
in consider_include_rules(fullpath
, f
):
284 def open_or_stdin(fname
):
290 def check_subsys_file(fname
, uses_dirs
):
292 # We're doing a distcheck build, or for some other reason there are
293 # no .may_include files.
297 uses_dirs
= { normalize_srcdir(k
) : { normalize_srcdir(d
) for d
in v
}
298 for (k
,v
) in uses_dirs
.items() }
299 uses_closure
= closure(uses_dirs
)
301 previous_subsystems
= []
303 with
open_or_stdin(fname
) as f
:
305 _
, name
, fname
= line
.split()
306 fname
= normalize_srcdir(fname
)
307 for prev
in previous_subsystems
:
308 if fname
in uses_closure
[prev
]:
309 print("INVERSION: {} uses {}".format(prev
,fname
))
311 previous_subsystems
.append(fname
)
314 def run_check_includes(topdir
, list_unused
=False, log_sorted_levels
=False,
315 list_advisories
=False, check_subsystem_order
=None):
318 for err
in walk_c_files(topdir
):
319 if err
.is_advisory
and not list_advisories
:
321 print(err
, file=sys
.stderr
)
322 if not err
.is_advisory
:
327 """To change which includes are allowed in a C file, edit the {}
328 files in its enclosing directory.""".format(RULES_FNAME
))
332 for rules
in get_all_include_rules():
333 rules
.noteUnusedRules()
336 for rules
in get_all_include_rules():
337 uses_dirs
[rules
.incpath
] = rules
.getAllowedDirectories()
339 remove_self_edges(uses_dirs
)
341 if check_subsystem_order
:
342 if check_subsys_file(check_subsystem_order
, uses_dirs
):
345 all_levels
= toposort(uses_dirs
)
347 if log_sorted_levels
:
348 for (n
, cur_level
) in enumerate(all_levels
):
353 print("There are circular .may_include dependencies in here somewhere:",
361 parser
= argparse
.ArgumentParser(prog
=progname
)
362 parser
.add_argument("--toposort", action
="store_true",
363 help="Print a topologically sorted list of modules")
364 parser
.add_argument("--list-unused", action
="store_true",
365 help="List unused lines in .may_include files.")
366 parser
.add_argument("--list-advisories", action
="store_true",
367 help="List advisories as well as forbidden includes")
368 parser
.add_argument("--check-subsystem-order", action
="store",
369 help="Check a list of subsystems for ordering")
370 parser
.add_argument("topdir", default
="src", nargs
="?",
371 help="Top-level directory for the tor source")
372 args
= parser
.parse_args(argv
[1:])
376 run_check_includes(topdir
=args
.topdir
,
377 log_sorted_levels
=args
.toposort
,
378 list_unused
=args
.list_unused
,
379 list_advisories
=args
.list_advisories
,
380 check_subsystem_order
=args
.check_subsystem_order
)
382 if __name__
== '__main__':