1 //===--- DumpAST.cpp - Serialize clang AST to LSP -------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 #include "SourceCode.h"
12 #include "support/Logger.h"
13 #include "clang/AST/ASTTypeTraits.h"
14 #include "clang/AST/Expr.h"
15 #include "clang/AST/ExprCXX.h"
16 #include "clang/AST/NestedNameSpecifier.h"
17 #include "clang/AST/PrettyPrinter.h"
18 #include "clang/AST/RecursiveASTVisitor.h"
19 #include "clang/AST/TextNodeDumper.h"
20 #include "clang/AST/Type.h"
21 #include "clang/AST/TypeLoc.h"
22 #include "clang/Basic/Specifiers.h"
23 #include "clang/Tooling/Syntax/Tokens.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/Support/raw_ostream.h"
32 using llvm::raw_ostream
;
33 template <typename Print
> std::string
toString(const Print
&C
) {
35 llvm::raw_string_ostream
OS(Result
);
37 return std::move(OS
.str());
40 bool isInjectedClassName(Decl
*D
) {
41 if (const auto *CRD
= llvm::dyn_cast
<CXXRecordDecl
>(D
))
42 return CRD
->isInjectedClassName();
46 class DumpVisitor
: public RecursiveASTVisitor
<DumpVisitor
> {
47 using Base
= RecursiveASTVisitor
<DumpVisitor
>;
49 const syntax::TokenBuffer
&Tokens
;
50 const ASTContext
&Ctx
;
52 // Pointers are into 'children' vector.
53 // They remain valid because while a node is on the stack we only add
54 // descendants, not siblings.
55 std::vector
<ASTNode
*> Stack
;
57 // Generic logic used to handle traversal of all node kinds.
60 bool traverseNodePre(llvm::StringRef Role
, const T
&Node
) {
62 assert(Root
.role
.empty());
63 Stack
.push_back(&Root
);
65 Stack
.back()->children
.emplace_back();
66 Stack
.push_back(&Stack
.back()->children
.back());
68 auto &N
= *Stack
.back();
70 N
.kind
= getKind(Node
);
71 N
.detail
= getDetail(Node
);
72 N
.range
= getRange(Node
);
73 N
.arcana
= getArcana(Node
);
76 bool traverseNodePost() {
77 assert(!Stack
.empty());
81 template <typename T
, typename Callable
>
82 bool traverseNode(llvm::StringRef Role
, const T
&Node
, const Callable
&Body
) {
83 traverseNodePre(Role
, Node
);
85 return traverseNodePost();
88 // Range: most nodes have getSourceRange(), with a couple of exceptions.
89 // We only return it if it's valid at both ends and there are no macros.
91 template <typename T
> std::optional
<Range
> getRange(const T
&Node
) {
92 SourceRange SR
= getSourceRange(Node
);
93 auto Spelled
= Tokens
.spelledForExpanded(Tokens
.expandedTokens(SR
));
96 return halfOpenToRange(
97 Tokens
.sourceManager(),
98 CharSourceRange::getCharRange(Spelled
->front().location(),
99 Spelled
->back().endLocation()));
101 template <typename T
, typename
= decltype(std::declval
<T
>().getSourceRange())>
102 SourceRange
getSourceRange(const T
&Node
) {
103 return Node
.getSourceRange();
105 template <typename T
,
106 typename
= decltype(std::declval
<T
*>()->getSourceRange())>
107 SourceRange
getSourceRange(const T
*Node
) {
108 return Node
->getSourceRange();
110 // TemplateName doesn't have a real Loc node type.
111 SourceRange
getSourceRange(const TemplateName
&Node
) { return SourceRange(); }
112 // Attr just uses a weird method name. Maybe we should fix it instead?
113 SourceRange
getSourceRange(const Attr
*Node
) { return Node
->getRange(); }
115 // Kind is usually the class name, without the suffix ("Type" etc).
116 // Where there's a set of variants instead, we use the 'Kind' enum values.
118 std::string
getKind(const Decl
*D
) { return D
->getDeclKindName(); }
119 std::string
getKind(const Stmt
*S
) {
120 std::string Result
= S
->getStmtClassName();
121 if (llvm::StringRef(Result
).ends_with("Stmt") ||
122 llvm::StringRef(Result
).ends_with("Expr"))
123 Result
.resize(Result
.size() - 4);
126 std::string
getKind(const TypeLoc
&TL
) {
128 if (TL
.getTypeLocClass() == TypeLoc::Qualified
)
130 return TL
.getType()->getTypeClassName();
132 std::string
getKind(const TemplateArgumentLoc
&TAL
) {
133 switch (TAL
.getArgument().getKind()) {
134 #define TEMPLATE_ARGUMENT_KIND(X) \
135 case TemplateArgument::X: \
137 TEMPLATE_ARGUMENT_KIND(Null
);
138 TEMPLATE_ARGUMENT_KIND(NullPtr
);
139 TEMPLATE_ARGUMENT_KIND(Expression
);
140 TEMPLATE_ARGUMENT_KIND(Integral
);
141 TEMPLATE_ARGUMENT_KIND(Pack
);
142 TEMPLATE_ARGUMENT_KIND(Type
);
143 TEMPLATE_ARGUMENT_KIND(Declaration
);
144 TEMPLATE_ARGUMENT_KIND(Template
);
145 TEMPLATE_ARGUMENT_KIND(TemplateExpansion
);
146 TEMPLATE_ARGUMENT_KIND(StructuralValue
);
147 #undef TEMPLATE_ARGUMENT_KIND
149 llvm_unreachable("Unhandled ArgKind enum");
151 std::string
getKind(const NestedNameSpecifierLoc
&NNSL
) {
152 assert(NNSL
.getNestedNameSpecifier());
153 switch (NNSL
.getNestedNameSpecifier()->getKind()) {
154 #define NNS_KIND(X) \
155 case NestedNameSpecifier::X: \
157 NNS_KIND(Identifier
);
160 NNS_KIND(TypeSpecWithTemplate
);
163 NNS_KIND(NamespaceAlias
);
166 llvm_unreachable("Unhandled SpecifierKind enum");
168 std::string
getKind(const CXXCtorInitializer
*CCI
) {
169 if (CCI
->isBaseInitializer())
170 return "BaseInitializer";
171 if (CCI
->isDelegatingInitializer())
172 return "DelegatingInitializer";
173 if (CCI
->isAnyMemberInitializer())
174 return "MemberInitializer";
175 llvm_unreachable("Unhandled CXXCtorInitializer type");
177 std::string
getKind(const TemplateName
&TN
) {
178 switch (TN
.getKind()) {
179 #define TEMPLATE_KIND(X) \
180 case TemplateName::X: \
182 TEMPLATE_KIND(Template
);
183 TEMPLATE_KIND(OverloadedTemplate
);
184 TEMPLATE_KIND(AssumedTemplate
);
185 TEMPLATE_KIND(QualifiedTemplate
);
186 TEMPLATE_KIND(DependentTemplate
);
187 TEMPLATE_KIND(SubstTemplateTemplateParm
);
188 TEMPLATE_KIND(SubstTemplateTemplateParmPack
);
189 TEMPLATE_KIND(UsingTemplate
);
192 llvm_unreachable("Unhandled NameKind enum");
194 std::string
getKind(const Attr
*A
) {
195 switch (A
->getKind()) {
199 #include "clang/Basic/AttrList.inc"
202 llvm_unreachable("Unhandled attr::Kind enum");
204 std::string
getKind(const CXXBaseSpecifier
&CBS
) {
205 // There aren't really any variants of CXXBaseSpecifier.
206 // To avoid special cases in the API/UI, use public/private as the kind.
207 return getAccessSpelling(CBS
.getAccessSpecifier()).str();
209 std::string
getKind(const ConceptReference
*CR
) {
210 // Again there are no variants here.
211 // Kind is "Concept", role is "reference"
215 // Detail is the single most important fact about the node.
216 // Often this is the name, sometimes a "kind" enum like operators or casts.
217 // We should avoid unbounded text, like dumping parameter lists.
219 std::string
getDetail(const Decl
*D
) {
220 const auto *ND
= dyn_cast
<NamedDecl
>(D
);
221 if (!ND
|| llvm::isa_and_nonnull
<CXXConstructorDecl
>(ND
->getAsFunction()) ||
222 isa
<CXXDestructorDecl
>(ND
))
224 std::string Name
= toString([&](raw_ostream
&OS
) { ND
->printName(OS
); });
226 return "(anonymous)";
229 std::string
getDetail(const Stmt
*S
) {
230 if (const auto *DRE
= dyn_cast
<DeclRefExpr
>(S
))
231 return DRE
->getNameInfo().getAsString();
232 if (const auto *DSDRE
= dyn_cast
<DependentScopeDeclRefExpr
>(S
))
233 return DSDRE
->getNameInfo().getAsString();
234 if (const auto *ME
= dyn_cast
<MemberExpr
>(S
))
235 return ME
->getMemberNameInfo().getAsString();
236 if (const auto *CE
= dyn_cast
<CastExpr
>(S
))
237 return CE
->getCastKindName();
238 if (const auto *BO
= dyn_cast
<BinaryOperator
>(S
))
239 return BO
->getOpcodeStr().str();
240 if (const auto *UO
= dyn_cast
<UnaryOperator
>(S
))
241 return UnaryOperator::getOpcodeStr(UO
->getOpcode()).str();
242 if (const auto *CCO
= dyn_cast
<CXXConstructExpr
>(S
))
243 return CCO
->getConstructor()->getNameAsString();
244 if (const auto *CTE
= dyn_cast
<CXXThisExpr
>(S
)) {
245 bool Const
= CTE
->getType()->getPointeeType().isLocalConstQualified();
246 if (CTE
->isImplicit())
247 return Const
? "const, implicit" : "implicit";
252 if (isa
<IntegerLiteral
, FloatingLiteral
, FixedPointLiteral
,
253 CharacterLiteral
, ImaginaryLiteral
, CXXBoolLiteralExpr
>(S
))
254 return toString([&](raw_ostream
&OS
) {
255 S
->printPretty(OS
, nullptr, Ctx
.getPrintingPolicy());
257 if (const auto *MTE
= dyn_cast
<MaterializeTemporaryExpr
>(S
))
258 return MTE
->isBoundToLvalueReference() ? "lvalue" : "rvalue";
261 std::string
getDetail(const TypeLoc
&TL
) {
262 if (TL
.getType().hasLocalQualifiers())
263 return TL
.getType().getLocalQualifiers().getAsString(
264 Ctx
.getPrintingPolicy());
265 if (const auto *TT
= dyn_cast
<TagType
>(TL
.getTypePtr()))
266 return getDetail(TT
->getDecl());
267 if (const auto *DT
= dyn_cast
<DeducedType
>(TL
.getTypePtr()))
269 return DT
->getDeducedType().getAsString(Ctx
.getPrintingPolicy());
270 if (const auto *BT
= dyn_cast
<BuiltinType
>(TL
.getTypePtr()))
271 return BT
->getName(Ctx
.getPrintingPolicy()).str();
272 if (const auto *TTPT
= dyn_cast
<TemplateTypeParmType
>(TL
.getTypePtr()))
273 return getDetail(TTPT
->getDecl());
274 if (const auto *TT
= dyn_cast
<TypedefType
>(TL
.getTypePtr()))
275 return getDetail(TT
->getDecl());
278 std::string
getDetail(const NestedNameSpecifierLoc
&NNSL
) {
279 const auto &NNS
= *NNSL
.getNestedNameSpecifier();
280 switch (NNS
.getKind()) {
281 case NestedNameSpecifier::Identifier
:
282 return NNS
.getAsIdentifier()->getName().str() + "::";
283 case NestedNameSpecifier::Namespace
:
284 return NNS
.getAsNamespace()->getNameAsString() + "::";
285 case NestedNameSpecifier::NamespaceAlias
:
286 return NNS
.getAsNamespaceAlias()->getNameAsString() + "::";
291 std::string
getDetail(const CXXCtorInitializer
*CCI
) {
292 if (FieldDecl
*FD
= CCI
->getAnyMember())
293 return getDetail(FD
);
294 if (TypeLoc TL
= CCI
->getBaseClassLoc())
295 return getDetail(TL
);
298 std::string
getDetail(const TemplateArgumentLoc
&TAL
) {
299 if (TAL
.getArgument().getKind() == TemplateArgument::Integral
)
300 return toString(TAL
.getArgument().getAsIntegral(), 10);
303 std::string
getDetail(const TemplateName
&TN
) {
304 return toString([&](raw_ostream
&OS
) {
305 TN
.print(OS
, Ctx
.getPrintingPolicy(), TemplateName::Qualified::None
);
308 std::string
getDetail(const Attr
*A
) {
309 return A
->getAttrName() ? A
->getNormalizedFullName() : A
->getSpelling();
311 std::string
getDetail(const CXXBaseSpecifier
&CBS
) {
312 return CBS
.isVirtual() ? "virtual" : "";
314 std::string
getDetail(const ConceptReference
*CR
) {
315 return CR
->getNamedConcept()->getNameAsString();
318 /// Arcana is produced by TextNodeDumper, for the types it supports.
320 template <typename Dump
> std::string
dump(const Dump
&D
) {
321 return toString([&](raw_ostream
&OS
) {
322 TextNodeDumper
Dumper(OS
, Ctx
, /*ShowColors=*/false);
326 template <typename T
> std::string
getArcana(const T
&N
) {
327 return dump([&](TextNodeDumper
&D
) { D
.Visit(N
); });
329 std::string
getArcana(const NestedNameSpecifierLoc
&NNS
) { return ""; }
330 std::string
getArcana(const TemplateName
&NNS
) { return ""; }
331 std::string
getArcana(const CXXBaseSpecifier
&CBS
) { return ""; }
332 std::string
getArcana(const TemplateArgumentLoc
&TAL
) {
333 return dump([&](TextNodeDumper
&D
) {
334 D
.Visit(TAL
.getArgument(), TAL
.getSourceRange());
337 std::string
getArcana(const TypeLoc
&TL
) {
338 return dump([&](TextNodeDumper
&D
) { D
.Visit(TL
.getType()); });
343 DumpVisitor(const syntax::TokenBuffer
&Tokens
, const ASTContext
&Ctx
)
344 : Tokens(Tokens
), Ctx(Ctx
) {}
346 // Override traversal to record the nodes we care about.
347 // Generally, these are nodes with position information (TypeLoc, not Type).
349 bool TraverseDecl(Decl
*D
) {
350 return !D
|| isInjectedClassName(D
) ||
351 traverseNode("declaration", D
, [&] { Base::TraverseDecl(D
); });
353 bool TraverseTypeLoc(TypeLoc TL
) {
354 return !TL
|| traverseNode("type", TL
, [&] { Base::TraverseTypeLoc(TL
); });
356 bool TraverseTemplateName(const TemplateName
&TN
) {
357 return traverseNode("template name", TN
,
358 [&] { Base::TraverseTemplateName(TN
); });
360 bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc
&TAL
) {
361 return traverseNode("template argument", TAL
,
362 [&] { Base::TraverseTemplateArgumentLoc(TAL
); });
364 bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNSL
) {
365 return !NNSL
|| traverseNode("specifier", NNSL
, [&] {
366 Base::TraverseNestedNameSpecifierLoc(NNSL
);
369 bool TraverseConstructorInitializer(CXXCtorInitializer
*CCI
) {
370 return !CCI
|| traverseNode("constructor initializer", CCI
, [&] {
371 Base::TraverseConstructorInitializer(CCI
);
374 bool TraverseAttr(Attr
*A
) {
375 return !A
|| traverseNode("attribute", A
, [&] { Base::TraverseAttr(A
); });
377 bool TraverseConceptReference(ConceptReference
*C
) {
378 return !C
|| traverseNode("reference", C
,
379 [&] { Base::TraverseConceptReference(C
); });
381 bool TraverseCXXBaseSpecifier(const CXXBaseSpecifier
&CBS
) {
382 return traverseNode("base", CBS
,
383 [&] { Base::TraverseCXXBaseSpecifier(CBS
); });
385 // Stmt is the same, but this form allows the data recursion optimization.
386 bool dataTraverseStmtPre(Stmt
*S
) {
387 return S
&& traverseNodePre(isa
<Expr
>(S
) ? "expression" : "statement", S
);
389 bool dataTraverseStmtPost(Stmt
*X
) { return traverseNodePost(); }
391 // QualifiedTypeLoc is handled strangely in RecursiveASTVisitor: the derived
392 // TraverseTypeLoc is not called for the inner UnqualTypeLoc.
393 // This means we'd never see 'int' in 'const int'! Work around that here.
394 // (The reason for the behavior is to avoid traversing the nested Type twice,
395 // but we ignore TraverseType anyway).
396 bool TraverseQualifiedTypeLoc(QualifiedTypeLoc QTL
) {
397 return TraverseTypeLoc(QTL
.getUnqualifiedLoc());
399 // Uninteresting parts of the AST that don't have locations within them.
400 bool TraverseNestedNameSpecifier(NestedNameSpecifier
*) { return true; }
401 bool TraverseType(QualType
) { return true; }
403 // OpaqueValueExpr blocks traversal, we must explicitly traverse it.
404 bool TraverseOpaqueValueExpr(OpaqueValueExpr
*E
) {
405 return TraverseStmt(E
->getSourceExpr());
407 // We only want to traverse the *syntactic form* to understand the selection.
408 bool TraversePseudoObjectExpr(PseudoObjectExpr
*E
) {
409 return TraverseStmt(E
->getSyntacticForm());
415 ASTNode
dumpAST(const DynTypedNode
&N
, const syntax::TokenBuffer
&Tokens
,
416 const ASTContext
&Ctx
) {
417 DumpVisitor
V(Tokens
, Ctx
);
418 // DynTypedNode only works with const, RecursiveASTVisitor only non-const :-(
419 if (const auto *D
= N
.get
<Decl
>())
420 V
.TraverseDecl(const_cast<Decl
*>(D
));
421 else if (const auto *S
= N
.get
<Stmt
>())
422 V
.TraverseStmt(const_cast<Stmt
*>(S
));
423 else if (const auto *NNSL
= N
.get
<NestedNameSpecifierLoc
>())
424 V
.TraverseNestedNameSpecifierLoc(
425 *const_cast<NestedNameSpecifierLoc
*>(NNSL
));
426 else if (const auto *NNS
= N
.get
<NestedNameSpecifier
>())
427 V
.TraverseNestedNameSpecifier(const_cast<NestedNameSpecifier
*>(NNS
));
428 else if (const auto *TL
= N
.get
<TypeLoc
>())
429 V
.TraverseTypeLoc(*const_cast<TypeLoc
*>(TL
));
430 else if (const auto *QT
= N
.get
<QualType
>())
431 V
.TraverseType(*const_cast<QualType
*>(QT
));
432 else if (const auto *CCI
= N
.get
<CXXCtorInitializer
>())
433 V
.TraverseConstructorInitializer(const_cast<CXXCtorInitializer
*>(CCI
));
434 else if (const auto *TAL
= N
.get
<TemplateArgumentLoc
>())
435 V
.TraverseTemplateArgumentLoc(*const_cast<TemplateArgumentLoc
*>(TAL
));
436 else if (const auto *CBS
= N
.get
<CXXBaseSpecifier
>())
437 V
.TraverseCXXBaseSpecifier(*const_cast<CXXBaseSpecifier
*>(CBS
));
438 else if (const auto *CR
= N
.get
<ConceptReference
>())
439 V
.TraverseConceptReference(const_cast<ConceptReference
*>(CR
));
441 elog("dumpAST: unhandled DynTypedNode kind {0}",
442 N
.getNodeKind().asStringRef());
443 return std::move(V
.Root
);
446 } // namespace clangd