1 //===--- DumpAST.cpp - Serialize clang AST to LSP -------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 #include "SourceCode.h"
12 #include "support/Logger.h"
13 #include "clang/AST/ASTTypeTraits.h"
14 #include "clang/AST/Expr.h"
15 #include "clang/AST/ExprCXX.h"
16 #include "clang/AST/NestedNameSpecifier.h"
17 #include "clang/AST/PrettyPrinter.h"
18 #include "clang/AST/RecursiveASTVisitor.h"
19 #include "clang/AST/TextNodeDumper.h"
20 #include "clang/AST/Type.h"
21 #include "clang/AST/TypeLoc.h"
22 #include "clang/Basic/Specifiers.h"
23 #include "clang/Tooling/Syntax/Tokens.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/Support/raw_ostream.h"
31 using llvm::raw_ostream
;
32 template <typename Print
> std::string
toString(const Print
&C
) {
34 llvm::raw_string_ostream
OS(Result
);
36 return std::move(OS
.str());
39 bool isInjectedClassName(Decl
*D
) {
40 if (const auto *CRD
= llvm::dyn_cast
<CXXRecordDecl
>(D
))
41 return CRD
->isInjectedClassName();
45 class DumpVisitor
: public RecursiveASTVisitor
<DumpVisitor
> {
46 using Base
= RecursiveASTVisitor
<DumpVisitor
>;
48 const syntax::TokenBuffer
&Tokens
;
49 const ASTContext
&Ctx
;
51 // Pointers are into 'children' vector.
52 // They remain valid because while a node is on the stack we only add
53 // descendants, not siblings.
54 std::vector
<ASTNode
*> Stack
;
56 // Generic logic used to handle traversal of all node kinds.
59 bool traverseNodePre(llvm::StringRef Role
, const T
&Node
) {
61 assert(Root
.role
.empty());
62 Stack
.push_back(&Root
);
64 Stack
.back()->children
.emplace_back();
65 Stack
.push_back(&Stack
.back()->children
.back());
67 auto &N
= *Stack
.back();
69 N
.kind
= getKind(Node
);
70 N
.detail
= getDetail(Node
);
71 N
.range
= getRange(Node
);
72 N
.arcana
= getArcana(Node
);
75 bool traverseNodePost() {
76 assert(!Stack
.empty());
80 template <typename T
, typename Callable
>
81 bool traverseNode(llvm::StringRef Role
, const T
&Node
, const Callable
&Body
) {
82 traverseNodePre(Role
, Node
);
84 return traverseNodePost();
87 // Range: most nodes have getSourceRange(), with a couple of exceptions.
88 // We only return it if it's valid at both ends and there are no macros.
90 template <typename T
> llvm::Optional
<Range
> getRange(const T
&Node
) {
91 SourceRange SR
= getSourceRange(Node
);
92 auto Spelled
= Tokens
.spelledForExpanded(Tokens
.expandedTokens(SR
));
95 return halfOpenToRange(
96 Tokens
.sourceManager(),
97 CharSourceRange::getCharRange(Spelled
->front().location(),
98 Spelled
->back().endLocation()));
100 template <typename T
, typename
= decltype(std::declval
<T
>().getSourceRange())>
101 SourceRange
getSourceRange(const T
&Node
) {
102 return Node
.getSourceRange();
104 template <typename T
,
105 typename
= decltype(std::declval
<T
*>()->getSourceRange())>
106 SourceRange
getSourceRange(const T
*Node
) {
107 return Node
->getSourceRange();
109 // TemplateName doesn't have a real Loc node type.
110 SourceRange
getSourceRange(const TemplateName
&Node
) { return SourceRange(); }
111 // Attr just uses a weird method name. Maybe we should fix it instead?
112 SourceRange
getSourceRange(const Attr
*Node
) { return Node
->getRange(); }
114 // Kind is usually the class name, without the suffix ("Type" etc).
115 // Where there's a set of variants instead, we use the 'Kind' enum values.
117 std::string
getKind(const Decl
*D
) { return D
->getDeclKindName(); }
118 std::string
getKind(const Stmt
*S
) {
119 std::string Result
= S
->getStmtClassName();
120 if (llvm::StringRef(Result
).endswith("Stmt") ||
121 llvm::StringRef(Result
).endswith("Expr"))
122 Result
.resize(Result
.size() - 4);
125 std::string
getKind(const TypeLoc
&TL
) {
127 if (TL
.getTypeLocClass() == TypeLoc::Qualified
)
129 return TL
.getType()->getTypeClassName();
131 std::string
getKind(const TemplateArgumentLoc
&TAL
) {
132 switch (TAL
.getArgument().getKind()) {
133 #define TEMPLATE_ARGUMENT_KIND(X) \
134 case TemplateArgument::X: \
136 TEMPLATE_ARGUMENT_KIND(Null
);
137 TEMPLATE_ARGUMENT_KIND(NullPtr
);
138 TEMPLATE_ARGUMENT_KIND(Expression
);
139 TEMPLATE_ARGUMENT_KIND(Integral
);
140 TEMPLATE_ARGUMENT_KIND(Pack
);
141 TEMPLATE_ARGUMENT_KIND(Type
);
142 TEMPLATE_ARGUMENT_KIND(Declaration
);
143 TEMPLATE_ARGUMENT_KIND(Template
);
144 TEMPLATE_ARGUMENT_KIND(TemplateExpansion
);
145 #undef TEMPLATE_ARGUMENT_KIND
147 llvm_unreachable("Unhandled ArgKind enum");
149 std::string
getKind(const NestedNameSpecifierLoc
&NNSL
) {
150 assert(NNSL
.getNestedNameSpecifier());
151 switch (NNSL
.getNestedNameSpecifier()->getKind()) {
152 #define NNS_KIND(X) \
153 case NestedNameSpecifier::X: \
155 NNS_KIND(Identifier
);
158 NNS_KIND(TypeSpecWithTemplate
);
161 NNS_KIND(NamespaceAlias
);
164 llvm_unreachable("Unhandled SpecifierKind enum");
166 std::string
getKind(const CXXCtorInitializer
*CCI
) {
167 if (CCI
->isBaseInitializer())
168 return "BaseInitializer";
169 if (CCI
->isDelegatingInitializer())
170 return "DelegatingInitializer";
171 if (CCI
->isAnyMemberInitializer())
172 return "MemberInitializer";
173 llvm_unreachable("Unhandled CXXCtorInitializer type");
175 std::string
getKind(const TemplateName
&TN
) {
176 switch (TN
.getKind()) {
177 #define TEMPLATE_KIND(X) \
178 case TemplateName::X: \
180 TEMPLATE_KIND(Template
);
181 TEMPLATE_KIND(OverloadedTemplate
);
182 TEMPLATE_KIND(AssumedTemplate
);
183 TEMPLATE_KIND(QualifiedTemplate
);
184 TEMPLATE_KIND(DependentTemplate
);
185 TEMPLATE_KIND(SubstTemplateTemplateParm
);
186 TEMPLATE_KIND(SubstTemplateTemplateParmPack
);
187 TEMPLATE_KIND(UsingTemplate
);
190 llvm_unreachable("Unhandled NameKind enum");
192 std::string
getKind(const Attr
*A
) {
193 switch (A
->getKind()) {
197 #include "clang/Basic/AttrList.inc"
200 llvm_unreachable("Unhandled attr::Kind enum");
202 std::string
getKind(const CXXBaseSpecifier
&CBS
) {
203 // There aren't really any variants of CXXBaseSpecifier.
204 // To avoid special cases in the API/UI, use public/private as the kind.
205 return getAccessSpelling(CBS
.getAccessSpecifier()).str();
208 // Detail is the single most important fact about the node.
209 // Often this is the name, sometimes a "kind" enum like operators or casts.
210 // We should avoid unbounded text, like dumping parameter lists.
212 std::string
getDetail(const Decl
*D
) {
213 const auto *ND
= dyn_cast
<NamedDecl
>(D
);
214 if (!ND
|| llvm::isa_and_nonnull
<CXXConstructorDecl
>(ND
->getAsFunction()) ||
215 isa
<CXXDestructorDecl
>(ND
))
217 std::string Name
= toString([&](raw_ostream
&OS
) { ND
->printName(OS
); });
219 return "(anonymous)";
222 std::string
getDetail(const Stmt
*S
) {
223 if (const auto *DRE
= dyn_cast
<DeclRefExpr
>(S
))
224 return DRE
->getNameInfo().getAsString();
225 if (const auto *DSDRE
= dyn_cast
<DependentScopeDeclRefExpr
>(S
))
226 return DSDRE
->getNameInfo().getAsString();
227 if (const auto *ME
= dyn_cast
<MemberExpr
>(S
))
228 return ME
->getMemberNameInfo().getAsString();
229 if (const auto *CE
= dyn_cast
<CastExpr
>(S
))
230 return CE
->getCastKindName();
231 if (const auto *BO
= dyn_cast
<BinaryOperator
>(S
))
232 return BO
->getOpcodeStr().str();
233 if (const auto *UO
= dyn_cast
<UnaryOperator
>(S
))
234 return UnaryOperator::getOpcodeStr(UO
->getOpcode()).str();
235 if (const auto *CCO
= dyn_cast
<CXXConstructExpr
>(S
))
236 return CCO
->getConstructor()->getNameAsString();
237 if (const auto *CTE
= dyn_cast
<CXXThisExpr
>(S
)) {
238 bool Const
= CTE
->getType()->getPointeeType().isLocalConstQualified();
239 if (CTE
->isImplicit())
240 return Const
? "const, implicit" : "implicit";
245 if (isa
<IntegerLiteral
, FloatingLiteral
, FixedPointLiteral
,
246 CharacterLiteral
, ImaginaryLiteral
, CXXBoolLiteralExpr
>(S
))
247 return toString([&](raw_ostream
&OS
) {
248 S
->printPretty(OS
, nullptr, Ctx
.getPrintingPolicy());
250 if (const auto *MTE
= dyn_cast
<MaterializeTemporaryExpr
>(S
))
251 return MTE
->isBoundToLvalueReference() ? "lvalue" : "rvalue";
254 std::string
getDetail(const TypeLoc
&TL
) {
255 if (TL
.getType().hasLocalQualifiers())
256 return TL
.getType().getLocalQualifiers().getAsString(
257 Ctx
.getPrintingPolicy());
258 if (const auto *TT
= dyn_cast
<TagType
>(TL
.getTypePtr()))
259 return getDetail(TT
->getDecl());
260 if (const auto *DT
= dyn_cast
<DeducedType
>(TL
.getTypePtr()))
262 return DT
->getDeducedType().getAsString(Ctx
.getPrintingPolicy());
263 if (const auto *BT
= dyn_cast
<BuiltinType
>(TL
.getTypePtr()))
264 return BT
->getName(Ctx
.getPrintingPolicy()).str();
265 if (const auto *TTPT
= dyn_cast
<TemplateTypeParmType
>(TL
.getTypePtr()))
266 return getDetail(TTPT
->getDecl());
267 if (const auto *TT
= dyn_cast
<TypedefType
>(TL
.getTypePtr()))
268 return getDetail(TT
->getDecl());
271 std::string
getDetail(const NestedNameSpecifierLoc
&NNSL
) {
272 const auto &NNS
= *NNSL
.getNestedNameSpecifier();
273 switch (NNS
.getKind()) {
274 case NestedNameSpecifier::Identifier
:
275 return NNS
.getAsIdentifier()->getName().str() + "::";
276 case NestedNameSpecifier::Namespace
:
277 return NNS
.getAsNamespace()->getNameAsString() + "::";
278 case NestedNameSpecifier::NamespaceAlias
:
279 return NNS
.getAsNamespaceAlias()->getNameAsString() + "::";
284 std::string
getDetail(const CXXCtorInitializer
*CCI
) {
285 if (FieldDecl
*FD
= CCI
->getAnyMember())
286 return getDetail(FD
);
287 if (TypeLoc TL
= CCI
->getBaseClassLoc())
288 return getDetail(TL
);
291 std::string
getDetail(const TemplateArgumentLoc
&TAL
) {
292 if (TAL
.getArgument().getKind() == TemplateArgument::Integral
)
293 return toString(TAL
.getArgument().getAsIntegral(), 10);
296 std::string
getDetail(const TemplateName
&TN
) {
297 return toString([&](raw_ostream
&OS
) {
298 TN
.print(OS
, Ctx
.getPrintingPolicy(), TemplateName::Qualified::None
);
301 std::string
getDetail(const Attr
*A
) {
302 return A
->getAttrName() ? A
->getNormalizedFullName() : A
->getSpelling();
304 std::string
getDetail(const CXXBaseSpecifier
&CBS
) {
305 return CBS
.isVirtual() ? "virtual" : "";
308 /// Arcana is produced by TextNodeDumper, for the types it supports.
310 template <typename Dump
> std::string
dump(const Dump
&D
) {
311 return toString([&](raw_ostream
&OS
) {
312 TextNodeDumper
Dumper(OS
, Ctx
, /*ShowColors=*/false);
316 template <typename T
> std::string
getArcana(const T
&N
) {
317 return dump([&](TextNodeDumper
&D
) { D
.Visit(N
); });
319 std::string
getArcana(const NestedNameSpecifierLoc
&NNS
) { return ""; }
320 std::string
getArcana(const TemplateName
&NNS
) { return ""; }
321 std::string
getArcana(const CXXBaseSpecifier
&CBS
) { return ""; }
322 std::string
getArcana(const TemplateArgumentLoc
&TAL
) {
323 return dump([&](TextNodeDumper
&D
) {
324 D
.Visit(TAL
.getArgument(), TAL
.getSourceRange());
327 std::string
getArcana(const TypeLoc
&TL
) {
328 return dump([&](TextNodeDumper
&D
) { D
.Visit(TL
.getType()); });
333 DumpVisitor(const syntax::TokenBuffer
&Tokens
, const ASTContext
&Ctx
)
334 : Tokens(Tokens
), Ctx(Ctx
) {}
336 // Override traversal to record the nodes we care about.
337 // Generally, these are nodes with position information (TypeLoc, not Type).
339 bool TraverseDecl(Decl
*D
) {
340 return !D
|| isInjectedClassName(D
) ||
341 traverseNode("declaration", D
, [&] { Base::TraverseDecl(D
); });
343 bool TraverseTypeLoc(TypeLoc TL
) {
344 return !TL
|| traverseNode("type", TL
, [&] { Base::TraverseTypeLoc(TL
); });
346 bool TraverseTemplateName(const TemplateName
&TN
) {
347 return traverseNode("template name", TN
,
348 [&] { Base::TraverseTemplateName(TN
); });
350 bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc
&TAL
) {
351 return traverseNode("template argument", TAL
,
352 [&] { Base::TraverseTemplateArgumentLoc(TAL
); });
354 bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNSL
) {
355 return !NNSL
|| traverseNode("specifier", NNSL
, [&] {
356 Base::TraverseNestedNameSpecifierLoc(NNSL
);
359 bool TraverseConstructorInitializer(CXXCtorInitializer
*CCI
) {
360 return !CCI
|| traverseNode("constructor initializer", CCI
, [&] {
361 Base::TraverseConstructorInitializer(CCI
);
364 bool TraverseAttr(Attr
*A
) {
365 return !A
|| traverseNode("attribute", A
, [&] { Base::TraverseAttr(A
); });
367 bool TraverseCXXBaseSpecifier(const CXXBaseSpecifier
&CBS
) {
368 return traverseNode("base", CBS
,
369 [&] { Base::TraverseCXXBaseSpecifier(CBS
); });
371 // Stmt is the same, but this form allows the data recursion optimization.
372 bool dataTraverseStmtPre(Stmt
*S
) {
373 return S
&& traverseNodePre(isa
<Expr
>(S
) ? "expression" : "statement", S
);
375 bool dataTraverseStmtPost(Stmt
*X
) { return traverseNodePost(); }
377 // QualifiedTypeLoc is handled strangely in RecursiveASTVisitor: the derived
378 // TraverseTypeLoc is not called for the inner UnqualTypeLoc.
379 // This means we'd never see 'int' in 'const int'! Work around that here.
380 // (The reason for the behavior is to avoid traversing the nested Type twice,
381 // but we ignore TraverseType anyway).
382 bool TraverseQualifiedTypeLoc(QualifiedTypeLoc QTL
) {
383 return TraverseTypeLoc(QTL
.getUnqualifiedLoc());
385 // Uninteresting parts of the AST that don't have locations within them.
386 bool TraverseNestedNameSpecifier(NestedNameSpecifier
*) { return true; }
387 bool TraverseType(QualType
) { return true; }
389 // OpaqueValueExpr blocks traversal, we must explicitly traverse it.
390 bool TraverseOpaqueValueExpr(OpaqueValueExpr
*E
) {
391 return TraverseStmt(E
->getSourceExpr());
393 // We only want to traverse the *syntactic form* to understand the selection.
394 bool TraversePseudoObjectExpr(PseudoObjectExpr
*E
) {
395 return TraverseStmt(E
->getSyntacticForm());
401 ASTNode
dumpAST(const DynTypedNode
&N
, const syntax::TokenBuffer
&Tokens
,
402 const ASTContext
&Ctx
) {
403 DumpVisitor
V(Tokens
, Ctx
);
404 // DynTypedNode only works with const, RecursiveASTVisitor only non-const :-(
405 if (const auto *D
= N
.get
<Decl
>())
406 V
.TraverseDecl(const_cast<Decl
*>(D
));
407 else if (const auto *S
= N
.get
<Stmt
>())
408 V
.TraverseStmt(const_cast<Stmt
*>(S
));
409 else if (const auto *NNSL
= N
.get
<NestedNameSpecifierLoc
>())
410 V
.TraverseNestedNameSpecifierLoc(
411 *const_cast<NestedNameSpecifierLoc
*>(NNSL
));
412 else if (const auto *NNS
= N
.get
<NestedNameSpecifier
>())
413 V
.TraverseNestedNameSpecifier(const_cast<NestedNameSpecifier
*>(NNS
));
414 else if (const auto *TL
= N
.get
<TypeLoc
>())
415 V
.TraverseTypeLoc(*const_cast<TypeLoc
*>(TL
));
416 else if (const auto *QT
= N
.get
<QualType
>())
417 V
.TraverseType(*const_cast<QualType
*>(QT
));
418 else if (const auto *CCI
= N
.get
<CXXCtorInitializer
>())
419 V
.TraverseConstructorInitializer(const_cast<CXXCtorInitializer
*>(CCI
));
420 else if (const auto *TAL
= N
.get
<TemplateArgumentLoc
>())
421 V
.TraverseTemplateArgumentLoc(*const_cast<TemplateArgumentLoc
*>(TAL
));
422 else if (const auto *CBS
= N
.get
<CXXBaseSpecifier
>())
423 V
.TraverseCXXBaseSpecifier(*const_cast<CXXBaseSpecifier
*>(CBS
));
425 elog("dumpAST: unhandled DynTypedNode kind {0}",
426 N
.getNodeKind().asStringRef());
427 return std::move(V
.Root
);
430 } // namespace clangd