Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / docs / tools / dump_ast_matchers.py
blobcc7024d1627b976fc5300d0cb8d8bd53e0f83ae9
1 #!/usr/bin/env python3
2 # A tool to parse ASTMatchers.h and update the documentation in
3 # ../LibASTMatchersReference.html automatically. Run from the
4 # directory in which this file is located to update the docs.
6 import collections
7 import re
9 try:
10 from urllib.request import urlopen
11 except ImportError:
12 from urllib2 import urlopen
14 CLASS_INDEX_PAGE_URL = "https://clang.llvm.org/doxygen/classes.html"
15 try:
16 CLASS_INDEX_PAGE = urlopen(CLASS_INDEX_PAGE_URL).read().decode("utf-8")
17 except Exception as e:
18 CLASS_INDEX_PAGE = None
19 print("Unable to get %s: %s" % (CLASS_INDEX_PAGE_URL, e))
21 MATCHERS_FILE = "../../include/clang/ASTMatchers/ASTMatchers.h"
23 # Each matcher is documented in one row of the form:
24 # result | name | argA
25 # The subsequent row contains the documentation and is hidden by default,
26 # becoming visible via javascript when the user clicks the matcher name.
27 TD_TEMPLATE = """
28 <tr><td>%(result)s</td><td class="name" onclick="toggle('%(id)s')"><a name="%(id)sAnchor">%(name)s</a></td><td>%(args)s</td></tr>
29 <tr><td colspan="4" class="doc" id="%(id)s"><pre>%(comment)s</pre></td></tr>
30 """
32 # We categorize the matchers into these three categories in the reference:
33 node_matchers = {}
34 narrowing_matchers = {}
35 traversal_matchers = {}
37 # We output multiple rows per matcher if the matcher can be used on multiple
38 # node types. Thus, we need a new id per row to control the documentation
39 # pop-up. ids[name] keeps track of those ids.
40 ids = collections.defaultdict(int)
42 # Cache for doxygen urls we have already verified.
43 doxygen_probes = {}
46 def esc(text):
47 """Escape any html in the given text."""
48 text = re.sub(r"&", "&amp;", text)
49 text = re.sub(r"<", "&lt;", text)
50 text = re.sub(r">", "&gt;", text)
52 def link_if_exists(m):
53 """Wrap a likely AST node name in a link to its clang docs.
55 We want to do this only if the page exists, in which case it will be
56 referenced from the class index page.
57 """
58 name = m.group(1)
59 url = "https://clang.llvm.org/doxygen/classclang_1_1%s.html" % name
60 if url not in doxygen_probes:
61 search_str = 'href="classclang_1_1%s.html"' % name
62 if CLASS_INDEX_PAGE is not None:
63 doxygen_probes[url] = search_str in CLASS_INDEX_PAGE
64 else:
65 doxygen_probes[url] = True
66 if not doxygen_probes[url]:
67 print("Did not find %s in class index page" % name)
68 if doxygen_probes[url]:
69 return r'Matcher&lt;<a href="%s">%s</a>&gt;' % (url, name)
70 else:
71 return m.group(0)
73 text = re.sub(r"Matcher&lt;([^\*&]+)&gt;", link_if_exists, text)
74 return text
77 def extract_result_types(comment):
78 """Extracts a list of result types from the given comment.
80 We allow annotations in the comment of the matcher to specify what
81 nodes a matcher can match on. Those comments have the form:
82 Usable as: Any Matcher | (Matcher<T1>[, Matcher<t2>[, ...]])
84 Returns ['*'] in case of 'Any Matcher', or ['T1', 'T2', ...].
85 Returns the empty list if no 'Usable as' specification could be
86 parsed.
87 """
88 result_types = []
89 m = re.search(r"Usable as: Any Matcher[\s\n]*$", comment, re.S)
90 if m:
91 return ["*"]
92 while True:
93 m = re.match(r"^(.*)Matcher<([^>]+)>\s*,?[\s\n]*$", comment, re.S)
94 if not m:
95 if re.search(r"Usable as:\s*$", comment):
96 return result_types
97 else:
98 return None
99 result_types += [m.group(2)]
100 comment = m.group(1)
103 def strip_doxygen(comment):
104 """Returns the given comment without \-escaped words."""
105 # If there is only a doxygen keyword in the line, delete the whole line.
106 comment = re.sub(r"^\\[^\s]+\n", r"", comment, flags=re.M)
108 # If there is a doxygen \see command, change the \see prefix into "See also:".
109 # FIXME: it would be better to turn this into a link to the target instead.
110 comment = re.sub(r"\\see", r"See also:", comment)
112 # Delete the doxygen command and the following whitespace.
113 comment = re.sub(r"\\[^\s]+\s+", r"", comment)
114 return comment
117 def unify_arguments(args):
118 """Gets rid of anything the user doesn't care about in the argument list."""
119 args = re.sub(r"internal::", r"", args)
120 args = re.sub(r"extern const\s+(.*)&", r"\1 ", args)
121 args = re.sub(r"&", r" ", args)
122 args = re.sub(r"(^|\s)M\d?(\s)", r"\1Matcher<*>\2", args)
123 args = re.sub(r"BindableMatcher", r"Matcher", args)
124 args = re.sub(r"const Matcher", r"Matcher", args)
125 return args
128 def unify_type(result_type):
129 """Gets rid of anything the user doesn't care about in the type name."""
130 result_type = re.sub(
131 r"^internal::(Bindable)?Matcher<([a-zA-Z_][a-zA-Z0-9_]*)>$", r"\2", result_type
133 return result_type
136 def add_matcher(result_type, name, args, comment, is_dyncast=False):
137 """Adds a matcher to one of our categories."""
138 if name == "id":
139 # FIXME: Figure out whether we want to support the 'id' matcher.
140 return
141 matcher_id = "%s%d" % (name, ids[name])
142 ids[name] += 1
143 args = unify_arguments(args)
144 result_type = unify_type(result_type)
146 docs_result_type = esc("Matcher<%s>" % result_type)
148 if name == "mapAnyOf":
149 args = "nodeMatcherFunction..."
150 docs_result_type = "<em>unspecified</em>"
152 matcher_html = TD_TEMPLATE % {
153 "result": docs_result_type,
154 "name": name,
155 "args": esc(args),
156 "comment": esc(strip_doxygen(comment)),
157 "id": matcher_id,
159 if is_dyncast:
160 dict = node_matchers
161 lookup = result_type + name
162 # Use a heuristic to figure out whether a matcher is a narrowing or
163 # traversal matcher. By default, matchers that take other matchers as
164 # arguments (and are not node matchers) do traversal. We specifically
165 # exclude known narrowing matchers that also take other matchers as
166 # arguments.
167 elif "Matcher<" not in args or name in [
168 "allOf",
169 "anyOf",
170 "anything",
171 "unless",
172 "mapAnyOf",
174 dict = narrowing_matchers
175 lookup = result_type + name + esc(args)
176 else:
177 dict = traversal_matchers
178 lookup = result_type + name + esc(args)
180 if dict.get(lookup) is None or len(dict.get(lookup)) < len(matcher_html):
181 dict[lookup] = matcher_html
184 def act_on_decl(declaration, comment, allowed_types):
185 """Parse the matcher out of the given declaration and comment.
187 If 'allowed_types' is set, it contains a list of node types the matcher
188 can match on, as extracted from the static type asserts in the matcher
189 definition.
191 if declaration.strip():
193 if re.match(r"^\s?(#|namespace|using|template <typename NodeType> using|})", declaration):
194 return
196 # Node matchers are defined by writing:
197 # VariadicDynCastAllOfMatcher<ResultType, ArgumentType> name;
198 m = re.match(
199 r""".*Variadic(?:DynCast)?AllOfMatcher\s*<
200 \s*([^\s,]+)\s*(?:,
201 \s*([^\s>]+)\s*)?>
202 \s*([^\s;]+)\s*;\s*$""",
203 declaration,
204 flags=re.X,
206 if m:
207 result, inner, name = m.groups()
208 if not inner:
209 inner = result
210 add_matcher(
211 result, name, "Matcher<%s>..." % inner, comment, is_dyncast=True
213 return
215 # Special case of type matchers:
216 # AstTypeMatcher<ArgumentType> name
217 m = re.match(
218 r""".*AstTypeMatcher\s*<
219 \s*([^\s>]+)\s*>
220 \s*([^\s;]+)\s*;\s*$""",
221 declaration,
222 flags=re.X,
224 if m:
225 inner, name = m.groups()
226 add_matcher(
227 "Type", name, "Matcher<%s>..." % inner, comment, is_dyncast=True
229 # FIXME: re-enable once we have implemented casting on the TypeLoc
230 # hierarchy.
231 # add_matcher('TypeLoc', '%sLoc' % name, 'Matcher<%sLoc>...' % inner,
232 # comment, is_dyncast=True)
233 return
235 # Parse the various matcher definition macros.
236 m = re.match(
237 """.*AST_TYPE(LOC)?_TRAVERSE_MATCHER(?:_DECL)?\(
238 \s*([^\s,]+\s*),
239 \s*(?:[^\s,]+\s*),
240 \s*AST_POLYMORPHIC_SUPPORTED_TYPES\(([^)]*)\)
241 \)\s*;\s*$""",
242 declaration,
243 flags=re.X,
245 if m:
246 loc, name, results = m.groups()[0:3]
247 result_types = [r.strip() for r in results.split(",")]
249 comment_result_types = extract_result_types(comment)
250 if comment_result_types and sorted(result_types) != sorted(
251 comment_result_types
253 raise Exception("Inconsistent documentation for: %s" % name)
254 for result_type in result_types:
255 add_matcher(result_type, name, "Matcher<Type>", comment)
256 # if loc:
257 # add_matcher('%sLoc' % result_type, '%sLoc' % name, 'Matcher<TypeLoc>',
258 # comment)
259 return
261 m = re.match(
262 r"""^\s*AST_POLYMORPHIC_MATCHER(_P)?(.?)(?:_OVERLOAD)?\(
263 \s*([^\s,]+)\s*,
264 \s*AST_POLYMORPHIC_SUPPORTED_TYPES\(([^)]*)\)
265 (?:,\s*([^\s,]+)\s*
266 ,\s*([^\s,]+)\s*)?
267 (?:,\s*([^\s,]+)\s*
268 ,\s*([^\s,]+)\s*)?
269 (?:,\s*\d+\s*)?
270 \)\s*{\s*$""",
271 declaration,
272 flags=re.X,
275 if m:
276 p, n, name, results = m.groups()[0:4]
277 args = m.groups()[4:]
278 result_types = [r.strip() for r in results.split(",")]
279 if allowed_types and allowed_types != result_types:
280 raise Exception("Inconsistent documentation for: %s" % name)
281 if n not in ["", "2"]:
282 raise Exception('Cannot parse "%s"' % declaration)
283 args = ", ".join(
284 "%s %s" % (args[i], args[i + 1])
285 for i in range(0, len(args), 2)
286 if args[i]
288 for result_type in result_types:
289 add_matcher(result_type, name, args, comment)
290 return
292 m = re.match(
293 r"""^\s*AST_POLYMORPHIC_MATCHER_REGEX(?:_OVERLOAD)?\(
294 \s*([^\s,]+)\s*,
295 \s*AST_POLYMORPHIC_SUPPORTED_TYPES\(([^)]*)\),
296 \s*([^\s,]+)\s*
297 (?:,\s*\d+\s*)?
298 \)\s*{\s*$""",
299 declaration,
300 flags=re.X,
303 if m:
304 name, results, arg_name = m.groups()[0:3]
305 result_types = [r.strip() for r in results.split(",")]
306 if allowed_types and allowed_types != result_types:
307 raise Exception("Inconsistent documentation for: %s" % name)
308 arg = "StringRef %s, Regex::RegexFlags Flags = NoFlags" % arg_name
309 comment += """
310 If the matcher is used in clang-query, RegexFlags parameter
311 should be passed as a quoted string. e.g: "NoFlags".
312 Flags can be combined with '|' example \"IgnoreCase | BasicRegex\"
314 for result_type in result_types:
315 add_matcher(result_type, name, arg, comment)
316 return
318 m = re.match(
319 r"""^\s*AST_MATCHER_FUNCTION(_P)?(.?)(?:_OVERLOAD)?\(
320 (?:\s*([^\s,]+)\s*,)?
321 \s*([^\s,]+)\s*
322 (?:,\s*([^\s,]+)\s*
323 ,\s*([^\s,]+)\s*)?
324 (?:,\s*([^\s,]+)\s*
325 ,\s*([^\s,]+)\s*)?
326 (?:,\s*\d+\s*)?
327 \)\s*{\s*$""",
328 declaration,
329 flags=re.X,
331 if m:
332 p, n, result, name = m.groups()[0:4]
333 args = m.groups()[4:]
334 if n not in ["", "2"]:
335 raise Exception('Cannot parse "%s"' % declaration)
336 args = ", ".join(
337 "%s %s" % (args[i], args[i + 1])
338 for i in range(0, len(args), 2)
339 if args[i]
341 add_matcher(result, name, args, comment)
342 return
344 m = re.match(
345 r"""^\s*AST_MATCHER(_P)?(.?)(?:_OVERLOAD)?\(
346 (?:\s*([^\s,]+)\s*,)?
347 \s*([^\s,]+)\s*
348 (?:,\s*([^,]+)\s*
349 ,\s*([^\s,]+)\s*)?
350 (?:,\s*([^\s,]+)\s*
351 ,\s*([^\s,]+)\s*)?
352 (?:,\s*\d+\s*)?
353 \)\s*{""",
354 declaration,
355 flags=re.X,
357 if m:
358 p, n, result, name = m.groups()[0:4]
359 args = m.groups()[4:]
360 if not result:
361 if not allowed_types:
362 raise Exception("Did not find allowed result types for: %s" % name)
363 result_types = allowed_types
364 else:
365 result_types = [result]
366 if n not in ["", "2"]:
367 raise Exception('Cannot parse "%s"' % declaration)
368 args = ", ".join(
369 "%s %s" % (args[i], args[i + 1])
370 for i in range(0, len(args), 2)
371 if args[i]
373 for result_type in result_types:
374 add_matcher(result_type, name, args, comment)
375 return
377 m = re.match(
378 r"""^\s*AST_MATCHER_REGEX(?:_OVERLOAD)?\(
379 \s*([^\s,]+)\s*,
380 \s*([^\s,]+)\s*,
381 \s*([^\s,]+)\s*
382 (?:,\s*\d+\s*)?
383 \)\s*{""",
384 declaration,
385 flags=re.X,
387 if m:
388 result, name, arg_name = m.groups()[0:3]
389 if not result:
390 if not allowed_types:
391 raise Exception("Did not find allowed result types for: %s" % name)
392 result_types = allowed_types
393 else:
394 result_types = [result]
395 arg = "StringRef %s, Regex::RegexFlags Flags = NoFlags" % arg_name
396 comment += """
397 If the matcher is used in clang-query, RegexFlags parameter
398 should be passed as a quoted string. e.g: "NoFlags".
399 Flags can be combined with '|' example \"IgnoreCase | BasicRegex\"
402 for result_type in result_types:
403 add_matcher(result_type, name, arg, comment)
404 return
406 # Parse ArgumentAdapting matchers.
407 m = re.match(
408 r"""^.*ArgumentAdaptingMatcherFunc<.*>\s*
409 ([a-zA-Z]*);$""",
410 declaration,
411 flags=re.X,
413 if m:
414 name = m.groups()[0]
415 add_matcher("*", name, "Matcher<*>", comment)
416 return
418 # Parse Variadic functions.
419 m = re.match(
420 r"""^.*internal::VariadicFunction\s*<\s*([^,]+),\s*([^,]+),\s*[^>]+>\s*
421 ([a-zA-Z]*);$""",
422 declaration,
423 flags=re.X,
425 if m:
426 result, arg, name = m.groups()[:3]
427 add_matcher(result, name, "%s, ..., %s" % (arg, arg), comment)
428 return
430 m = re.match(
431 r"""^.*internal::VariadicFunction\s*<\s*
432 internal::PolymorphicMatcher<[\S\s]+
433 AST_POLYMORPHIC_SUPPORTED_TYPES\(([^)]*)\),\s*(.*);$""",
434 declaration,
435 flags=re.X,
438 if m:
439 results, trailing = m.groups()
440 trailing, name = trailing.rsplit(">", 1)
441 name = name.strip()
442 trailing, _ = trailing.rsplit(",", 1)
443 _, arg = trailing.rsplit(",", 1)
444 arg = arg.strip()
446 result_types = [r.strip() for r in results.split(",")]
447 for result_type in result_types:
448 add_matcher(result_type, name, "%s, ..., %s" % (arg, arg), comment)
449 return
451 # Parse Variadic operator matchers.
452 m = re.match(
453 r"""^.*VariadicOperatorMatcherFunc\s*<\s*([^,]+),\s*([^\s]+)\s*>\s*
454 ([a-zA-Z]*);$""",
455 declaration,
456 flags=re.X,
458 if m:
459 min_args, max_args, name = m.groups()[:3]
460 if max_args == "1":
461 add_matcher("*", name, "Matcher<*>", comment)
462 return
463 elif max_args == "std::numeric_limits<unsigned>::max()":
464 add_matcher("*", name, "Matcher<*>, ..., Matcher<*>", comment)
465 return
467 m = re.match(
468 r"""^.*MapAnyOfMatcher<.*>\s*
469 ([a-zA-Z]*);$""",
470 declaration,
471 flags=re.X,
473 if m:
474 name = m.groups()[0]
475 add_matcher("*", name, "Matcher<*>...Matcher<*>", comment)
476 return
478 # Parse free standing matcher functions, like:
479 # Matcher<ResultType> Name(Matcher<ArgumentType> InnerMatcher) {
480 m = re.match(
481 r"""^\s*(?:template\s+<\s*(?:class|typename)\s+(.+)\s*>\s+)?
482 (.*)\s+
483 ([^\s\(]+)\s*\(
484 (.*)
485 \)\s*{""",
486 declaration,
487 re.X,
489 if m:
490 template_name, result, name, args = m.groups()
491 if template_name:
492 matcherTemplateArgs = re.findall(
493 r"Matcher<\s*(%s)\s*>" % template_name, args
495 templateArgs = re.findall(
496 r"(?:^|[\s,<])(%s)(?:$|[\s,>])" % template_name, args
498 if len(matcherTemplateArgs) < len(templateArgs):
499 # The template name is used naked, so don't replace with `*`` later on
500 template_name = None
501 else:
502 args = re.sub(
503 r"(^|[\s,<])%s($|[\s,>])" % template_name, r"\1*\2", args
505 args = ", ".join(p.strip() for p in args.split(","))
506 m = re.match(r"(?:^|.*\s+)internal::(?:Bindable)?Matcher<([^>]+)>$", result)
507 if m:
508 result_types = [m.group(1)]
509 if (
510 template_name
511 and len(result_types) == 1
512 and result_types[0] == template_name
514 result_types = ["*"]
515 else:
516 result_types = extract_result_types(comment)
517 if not result_types:
518 if not comment:
519 # Only overloads don't have their own doxygen comments; ignore those.
520 print('Ignoring "%s"' % name)
521 else:
522 print('Cannot determine result type for "%s"' % name)
523 else:
524 for result_type in result_types:
525 add_matcher(result_type, name, args, comment)
526 else:
527 print('*** Unparsable: "' + declaration + '" ***')
530 def sort_table(matcher_type, matcher_map):
531 """Returns the sorted html table for the given row map."""
532 table = ""
533 for key in sorted(matcher_map.keys()):
534 table += matcher_map[key] + "\n"
535 return (
536 "<!-- START_%(type)s_MATCHERS -->\n"
537 + "%(table)s"
538 + "<!--END_%(type)s_MATCHERS -->"
539 ) % {
540 "type": matcher_type,
541 "table": table,
545 # Parse the ast matchers.
546 # We alternate between two modes:
547 # body = True: We parse the definition of a matcher. We need
548 # to parse the full definition before adding a matcher, as the
549 # definition might contain static asserts that specify the result
550 # type.
551 # body = False: We parse the comments and declaration of the matcher.
552 comment = ""
553 declaration = ""
554 allowed_types = []
555 body = False
556 for line in open(MATCHERS_FILE).read().splitlines():
557 if body:
558 if line.strip() and line[0] == "}":
559 if declaration:
560 act_on_decl(declaration, comment, allowed_types)
561 comment = ""
562 declaration = ""
563 allowed_types = []
564 body = False
565 else:
566 m = re.search(r"is_base_of<([^,]+), NodeType>", line)
567 if m and m.group(1):
568 allowed_types += [m.group(1)]
569 continue
570 if line.strip() and line.lstrip()[0] == "/":
571 comment += re.sub(r"^/+\s?", "", line) + "\n"
572 else:
573 declaration += " " + line
574 if (
575 (not line.strip())
576 or line.rstrip()[-1] == ";"
577 or (line.rstrip()[-1] == "{" and line.rstrip()[-3:] != "= {")
579 if line.strip() and line.rstrip()[-1] == "{":
580 body = True
581 else:
582 act_on_decl(declaration, comment, allowed_types)
583 comment = ""
584 declaration = ""
585 allowed_types = []
587 node_matcher_table = sort_table("DECL", node_matchers)
588 narrowing_matcher_table = sort_table("NARROWING", narrowing_matchers)
589 traversal_matcher_table = sort_table("TRAVERSAL", traversal_matchers)
591 reference = open("../LibASTMatchersReference.html").read()
592 reference = re.sub(
593 r"<!-- START_DECL_MATCHERS.*END_DECL_MATCHERS -->",
594 node_matcher_table,
595 reference,
596 flags=re.S,
598 reference = re.sub(
599 r"<!-- START_NARROWING_MATCHERS.*END_NARROWING_MATCHERS -->",
600 narrowing_matcher_table,
601 reference,
602 flags=re.S,
604 reference = re.sub(
605 r"<!-- START_TRAVERSAL_MATCHERS.*END_TRAVERSAL_MATCHERS -->",
606 traversal_matcher_table,
607 reference,
608 flags=re.S,
611 with open("../LibASTMatchersReference.html", "w", newline="\n") as output:
612 output.write(reference)