2 # A tool to parse ASTMatchers.h and update the documentation in
3 # ../LibASTMatchersReference.html automatically. Run from the
4 # directory in which this file is located to update the docs.
10 from urllib
.request
import urlopen
12 from urllib2
import urlopen
14 CLASS_INDEX_PAGE_URL
= "https://clang.llvm.org/doxygen/classes.html"
16 CLASS_INDEX_PAGE
= urlopen(CLASS_INDEX_PAGE_URL
).read().decode("utf-8")
17 except Exception as e
:
18 CLASS_INDEX_PAGE
= None
19 print("Unable to get %s: %s" % (CLASS_INDEX_PAGE_URL
, e
))
21 MATCHERS_FILE
= "../../include/clang/ASTMatchers/ASTMatchers.h"
23 # Each matcher is documented in one row of the form:
24 # result | name | argA
25 # The subsequent row contains the documentation and is hidden by default,
26 # becoming visible via javascript when the user clicks the matcher name.
28 <tr><td>%(result)s</td><td class="name" onclick="toggle('%(id)s')"><a name="%(id)sAnchor">%(name)s</a></td><td>%(args)s</td></tr>
29 <tr><td colspan="4" class="doc" id="%(id)s"><pre>%(comment)s</pre></td></tr>
32 # We categorize the matchers into these three categories in the reference:
34 narrowing_matchers
= {}
35 traversal_matchers
= {}
37 # We output multiple rows per matcher if the matcher can be used on multiple
38 # node types. Thus, we need a new id per row to control the documentation
39 # pop-up. ids[name] keeps track of those ids.
40 ids
= collections
.defaultdict(int)
42 # Cache for doxygen urls we have already verified.
47 """Escape any html in the given text."""
48 text
= re
.sub(r
"&", "&", text
)
49 text
= re
.sub(r
"<", "<", text
)
50 text
= re
.sub(r
">", ">", text
)
52 def link_if_exists(m
):
53 """Wrap a likely AST node name in a link to its clang docs.
55 We want to do this only if the page exists, in which case it will be
56 referenced from the class index page.
59 url
= "https://clang.llvm.org/doxygen/classclang_1_1%s.html" % name
60 if url
not in doxygen_probes
:
61 search_str
= 'href="classclang_1_1%s.html"' % name
62 if CLASS_INDEX_PAGE
is not None:
63 doxygen_probes
[url
] = search_str
in CLASS_INDEX_PAGE
65 doxygen_probes
[url
] = True
66 if not doxygen_probes
[url
]:
67 print("Did not find %s in class index page" % name
)
68 if doxygen_probes
[url
]:
69 return r
'Matcher<<a href="%s">%s</a>>' % (url
, name
)
73 text
= re
.sub(r
"Matcher<([^\*&]+)>", link_if_exists
, text
)
77 def extract_result_types(comment
):
78 """Extracts a list of result types from the given comment.
80 We allow annotations in the comment of the matcher to specify what
81 nodes a matcher can match on. Those comments have the form:
82 Usable as: Any Matcher | (Matcher<T1>[, Matcher<t2>[, ...]])
84 Returns ['*'] in case of 'Any Matcher', or ['T1', 'T2', ...].
85 Returns the empty list if no 'Usable as' specification could be
89 m
= re
.search(r
"Usable as: Any Matcher[\s\n]*$", comment
, re
.S
)
93 m
= re
.match(r
"^(.*)Matcher<([^>]+)>\s*,?[\s\n]*$", comment
, re
.S
)
95 if re
.search(r
"Usable as:\s*$", comment
):
99 result_types
+= [m
.group(2)]
103 def strip_doxygen(comment
):
104 """Returns the given comment without \-escaped words."""
105 # If there is only a doxygen keyword in the line, delete the whole line.
106 comment
= re
.sub(r
"^\\[^\s]+\n", r
"", comment
, flags
=re
.M
)
108 # If there is a doxygen \see command, change the \see prefix into "See also:".
109 # FIXME: it would be better to turn this into a link to the target instead.
110 comment
= re
.sub(r
"\\see", r
"See also:", comment
)
112 # Delete the doxygen command and the following whitespace.
113 comment
= re
.sub(r
"\\[^\s]+\s+", r
"", comment
)
117 def unify_arguments(args
):
118 """Gets rid of anything the user doesn't care about in the argument list."""
119 args
= re
.sub(r
"internal::", r
"", args
)
120 args
= re
.sub(r
"extern const\s+(.*)&", r
"\1 ", args
)
121 args
= re
.sub(r
"&", r
" ", args
)
122 args
= re
.sub(r
"(^|\s)M\d?(\s)", r
"\1Matcher<*>\2", args
)
123 args
= re
.sub(r
"BindableMatcher", r
"Matcher", args
)
124 args
= re
.sub(r
"const Matcher", r
"Matcher", args
)
128 def unify_type(result_type
):
129 """Gets rid of anything the user doesn't care about in the type name."""
130 result_type
= re
.sub(
131 r
"^internal::(Bindable)?Matcher<([a-zA-Z_][a-zA-Z0-9_]*)>$", r
"\2", result_type
136 def add_matcher(result_type
, name
, args
, comment
, is_dyncast
=False):
137 """Adds a matcher to one of our categories."""
139 # FIXME: Figure out whether we want to support the 'id' matcher.
141 matcher_id
= "%s%d" % (name
, ids
[name
])
143 args
= unify_arguments(args
)
144 result_type
= unify_type(result_type
)
146 docs_result_type
= esc("Matcher<%s>" % result_type
)
148 if name
== "mapAnyOf":
149 args
= "nodeMatcherFunction..."
150 docs_result_type
= "<em>unspecified</em>"
152 matcher_html
= TD_TEMPLATE
% {
153 "result": docs_result_type
,
156 "comment": esc(strip_doxygen(comment
)),
161 lookup
= result_type
+ name
162 # Use a heuristic to figure out whether a matcher is a narrowing or
163 # traversal matcher. By default, matchers that take other matchers as
164 # arguments (and are not node matchers) do traversal. We specifically
165 # exclude known narrowing matchers that also take other matchers as
167 elif "Matcher<" not in args
or name
in [
174 dict = narrowing_matchers
175 lookup
= result_type
+ name
+ esc(args
)
177 dict = traversal_matchers
178 lookup
= result_type
+ name
+ esc(args
)
180 if dict.get(lookup
) is None or len(dict.get(lookup
)) < len(matcher_html
):
181 dict[lookup
] = matcher_html
184 def act_on_decl(declaration
, comment
, allowed_types
):
185 """Parse the matcher out of the given declaration and comment.
187 If 'allowed_types' is set, it contains a list of node types the matcher
188 can match on, as extracted from the static type asserts in the matcher
191 if declaration
.strip():
193 if re
.match(r
"^\s?(#|namespace|using|template <typename NodeType> using|})", declaration
):
196 # Node matchers are defined by writing:
197 # VariadicDynCastAllOfMatcher<ResultType, ArgumentType> name;
199 r
""".*Variadic(?:DynCast)?AllOfMatcher\s*<
202 \s*([^\s;]+)\s*;\s*$""",
207 result
, inner
, name
= m
.groups()
211 result
, name
, "Matcher<%s>..." % inner
, comment
, is_dyncast
=True
215 # Special case of type matchers:
216 # AstTypeMatcher<ArgumentType> name
218 r
""".*AstTypeMatcher\s*<
220 \s*([^\s;]+)\s*;\s*$""",
225 inner
, name
= m
.groups()
227 "Type", name
, "Matcher<%s>..." % inner
, comment
, is_dyncast
=True
229 # FIXME: re-enable once we have implemented casting on the TypeLoc
231 # add_matcher('TypeLoc', '%sLoc' % name, 'Matcher<%sLoc>...' % inner,
232 # comment, is_dyncast=True)
235 # Parse the various matcher definition macros.
237 """.*AST_TYPE(LOC)?_TRAVERSE_MATCHER(?:_DECL)?\(
240 \s*AST_POLYMORPHIC_SUPPORTED_TYPES\(([^)]*)\)
246 loc
, name
, results
= m
.groups()[0:3]
247 result_types
= [r
.strip() for r
in results
.split(",")]
249 comment_result_types
= extract_result_types(comment
)
250 if comment_result_types
and sorted(result_types
) != sorted(
253 raise Exception("Inconsistent documentation for: %s" % name
)
254 for result_type
in result_types
:
255 add_matcher(result_type
, name
, "Matcher<Type>", comment
)
257 # add_matcher('%sLoc' % result_type, '%sLoc' % name, 'Matcher<TypeLoc>',
262 r
"""^\s*AST_POLYMORPHIC_MATCHER(_P)?(.?)(?:_OVERLOAD)?\(
264 \s*AST_POLYMORPHIC_SUPPORTED_TYPES\(([^)]*)\)
276 p
, n
, name
, results
= m
.groups()[0:4]
277 args
= m
.groups()[4:]
278 result_types
= [r
.strip() for r
in results
.split(",")]
279 if allowed_types
and allowed_types
!= result_types
:
280 raise Exception("Inconsistent documentation for: %s" % name
)
281 if n
not in ["", "2"]:
282 raise Exception('Cannot parse "%s"' % declaration
)
284 "%s %s" % (args
[i
], args
[i
+ 1])
285 for i
in range(0, len(args
), 2)
288 for result_type
in result_types
:
289 add_matcher(result_type
, name
, args
, comment
)
293 r
"""^\s*AST_POLYMORPHIC_MATCHER_REGEX(?:_OVERLOAD)?\(
295 \s*AST_POLYMORPHIC_SUPPORTED_TYPES\(([^)]*)\),
304 name
, results
, arg_name
= m
.groups()[0:3]
305 result_types
= [r
.strip() for r
in results
.split(",")]
306 if allowed_types
and allowed_types
!= result_types
:
307 raise Exception("Inconsistent documentation for: %s" % name
)
308 arg
= "StringRef %s, Regex::RegexFlags Flags = NoFlags" % arg_name
310 If the matcher is used in clang-query, RegexFlags parameter
311 should be passed as a quoted string. e.g: "NoFlags".
312 Flags can be combined with '|' example \"IgnoreCase | BasicRegex\"
314 for result_type
in result_types
:
315 add_matcher(result_type
, name
, arg
, comment
)
319 r
"""^\s*AST_MATCHER_FUNCTION(_P)?(.?)(?:_OVERLOAD)?\(
320 (?:\s*([^\s,]+)\s*,)?
332 p
, n
, result
, name
= m
.groups()[0:4]
333 args
= m
.groups()[4:]
334 if n
not in ["", "2"]:
335 raise Exception('Cannot parse "%s"' % declaration
)
337 "%s %s" % (args
[i
], args
[i
+ 1])
338 for i
in range(0, len(args
), 2)
341 add_matcher(result
, name
, args
, comment
)
345 r
"""^\s*AST_MATCHER(_P)?(.?)(?:_OVERLOAD)?\(
346 (?:\s*([^\s,]+)\s*,)?
358 p
, n
, result
, name
= m
.groups()[0:4]
359 args
= m
.groups()[4:]
361 if not allowed_types
:
362 raise Exception("Did not find allowed result types for: %s" % name
)
363 result_types
= allowed_types
365 result_types
= [result
]
366 if n
not in ["", "2"]:
367 raise Exception('Cannot parse "%s"' % declaration
)
369 "%s %s" % (args
[i
], args
[i
+ 1])
370 for i
in range(0, len(args
), 2)
373 for result_type
in result_types
:
374 add_matcher(result_type
, name
, args
, comment
)
378 r
"""^\s*AST_MATCHER_REGEX(?:_OVERLOAD)?\(
388 result
, name
, arg_name
= m
.groups()[0:3]
390 if not allowed_types
:
391 raise Exception("Did not find allowed result types for: %s" % name
)
392 result_types
= allowed_types
394 result_types
= [result
]
395 arg
= "StringRef %s, Regex::RegexFlags Flags = NoFlags" % arg_name
397 If the matcher is used in clang-query, RegexFlags parameter
398 should be passed as a quoted string. e.g: "NoFlags".
399 Flags can be combined with '|' example \"IgnoreCase | BasicRegex\"
402 for result_type
in result_types
:
403 add_matcher(result_type
, name
, arg
, comment
)
406 # Parse ArgumentAdapting matchers.
408 r
"""^.*ArgumentAdaptingMatcherFunc<.*>\s*
415 add_matcher("*", name
, "Matcher<*>", comment
)
418 # Parse Variadic functions.
420 r
"""^.*internal::VariadicFunction\s*<\s*([^,]+),\s*([^,]+),\s*[^>]+>\s*
426 result
, arg
, name
= m
.groups()[:3]
427 add_matcher(result
, name
, "%s, ..., %s" % (arg
, arg
), comment
)
431 r
"""^.*internal::VariadicFunction\s*<\s*
432 internal::PolymorphicMatcher<[\S\s]+
433 AST_POLYMORPHIC_SUPPORTED_TYPES\(([^)]*)\),\s*(.*);$""",
439 results
, trailing
= m
.groups()
440 trailing
, name
= trailing
.rsplit(">", 1)
442 trailing
, _
= trailing
.rsplit(",", 1)
443 _
, arg
= trailing
.rsplit(",", 1)
446 result_types
= [r
.strip() for r
in results
.split(",")]
447 for result_type
in result_types
:
448 add_matcher(result_type
, name
, "%s, ..., %s" % (arg
, arg
), comment
)
451 # Parse Variadic operator matchers.
453 r
"""^.*VariadicOperatorMatcherFunc\s*<\s*([^,]+),\s*([^\s]+)\s*>\s*
459 min_args
, max_args
, name
= m
.groups()[:3]
461 add_matcher("*", name
, "Matcher<*>", comment
)
463 elif max_args
== "std::numeric_limits<unsigned>::max()":
464 add_matcher("*", name
, "Matcher<*>, ..., Matcher<*>", comment
)
468 r
"""^.*MapAnyOfMatcher<.*>\s*
475 add_matcher("*", name
, "Matcher<*>...Matcher<*>", comment
)
478 # Parse free standing matcher functions, like:
479 # Matcher<ResultType> Name(Matcher<ArgumentType> InnerMatcher) {
481 r
"""^\s*(?:template\s+<\s*(?:class|typename)\s+(.+)\s*>\s+)?
490 template_name
, result
, name
, args
= m
.groups()
492 matcherTemplateArgs
= re
.findall(
493 r
"Matcher<\s*(%s)\s*>" % template_name
, args
495 templateArgs
= re
.findall(
496 r
"(?:^|[\s,<])(%s)(?:$|[\s,>])" % template_name
, args
498 if len(matcherTemplateArgs
) < len(templateArgs
):
499 # The template name is used naked, so don't replace with `*`` later on
503 r
"(^|[\s,<])%s($|[\s,>])" % template_name
, r
"\1*\2", args
505 args
= ", ".join(p
.strip() for p
in args
.split(","))
506 m
= re
.match(r
"(?:^|.*\s+)internal::(?:Bindable)?Matcher<([^>]+)>$", result
)
508 result_types
= [m
.group(1)]
511 and len(result_types
) == 1
512 and result_types
[0] == template_name
516 result_types
= extract_result_types(comment
)
519 # Only overloads don't have their own doxygen comments; ignore those.
520 print('Ignoring "%s"' % name
)
522 print('Cannot determine result type for "%s"' % name
)
524 for result_type
in result_types
:
525 add_matcher(result_type
, name
, args
, comment
)
527 print('*** Unparsable: "' + declaration
+ '" ***')
530 def sort_table(matcher_type
, matcher_map
):
531 """Returns the sorted html table for the given row map."""
533 for key
in sorted(matcher_map
.keys()):
534 table
+= matcher_map
[key
] + "\n"
536 "<!-- START_%(type)s_MATCHERS -->\n"
538 + "<!--END_%(type)s_MATCHERS -->"
540 "type": matcher_type
,
545 # Parse the ast matchers.
546 # We alternate between two modes:
547 # body = True: We parse the definition of a matcher. We need
548 # to parse the full definition before adding a matcher, as the
549 # definition might contain static asserts that specify the result
551 # body = False: We parse the comments and declaration of the matcher.
556 for line
in open(MATCHERS_FILE
).read().splitlines():
558 if line
.strip() and line
[0] == "}":
560 act_on_decl(declaration
, comment
, allowed_types
)
566 m
= re
.search(r
"is_base_of<([^,]+), NodeType>", line
)
568 allowed_types
+= [m
.group(1)]
570 if line
.strip() and line
.lstrip()[0] == "/":
571 comment
+= re
.sub(r
"^/+\s?", "", line
) + "\n"
573 declaration
+= " " + line
576 or line
.rstrip()[-1] == ";"
577 or (line
.rstrip()[-1] == "{" and line
.rstrip()[-3:] != "= {")
579 if line
.strip() and line
.rstrip()[-1] == "{":
582 act_on_decl(declaration
, comment
, allowed_types
)
587 node_matcher_table
= sort_table("DECL", node_matchers
)
588 narrowing_matcher_table
= sort_table("NARROWING", narrowing_matchers
)
589 traversal_matcher_table
= sort_table("TRAVERSAL", traversal_matchers
)
591 reference
= open("../LibASTMatchersReference.html").read()
593 r
"<!-- START_DECL_MATCHERS.*END_DECL_MATCHERS -->",
599 r
"<!-- START_NARROWING_MATCHERS.*END_NARROWING_MATCHERS -->",
600 narrowing_matcher_table
,
605 r
"<!-- START_TRAVERSAL_MATCHERS.*END_TRAVERSAL_MATCHERS -->",
606 traversal_matcher_table
,
611 with
open("../LibASTMatchersReference.html", "w", newline
="\n") as output
:
612 output
.write(reference
)