1 //===- ExtractAPI/Serialization/SymbolGraphSerializer.cpp -------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements the SymbolGraphSerializer.
12 //===----------------------------------------------------------------------===//
14 #include "clang/ExtractAPI/Serialization/SymbolGraphSerializer.h"
15 #include "clang/Basic/Version.h"
16 #include "clang/ExtractAPI/API.h"
17 #include "clang/ExtractAPI/DeclarationFragments.h"
18 #include "llvm/Support/JSON.h"
19 #include "llvm/Support/Path.h"
20 #include "llvm/Support/VersionTuple.h"
21 #include <type_traits>
23 using namespace clang
;
24 using namespace clang::extractapi
;
26 using namespace llvm::json
;
30 /// Helper function to inject a JSON object \p Obj into another object \p Paren
31 /// at position \p Key.
32 void serializeObject(Object
&Paren
, StringRef Key
, Optional
<Object
> Obj
) {
34 Paren
[Key
] = std::move(Obj
.value());
37 /// Helper function to inject a JSON array \p Array into object \p Paren at
39 void serializeArray(Object
&Paren
, StringRef Key
, Optional
<Array
> Array
) {
41 Paren
[Key
] = std::move(Array
.value());
44 /// Serialize a \c VersionTuple \p V with the Symbol Graph semantic version
47 /// A semantic version object contains three numeric fields, representing the
48 /// \c major, \c minor, and \c patch parts of the version tuple.
49 /// For example version tuple 1.0.3 is serialized as:
58 /// \returns \c None if the version \p V is empty, or an \c Object containing
59 /// the semantic version representation of \p V.
60 Optional
<Object
> serializeSemanticVersion(const VersionTuple
&V
) {
65 Version
["major"] = V
.getMajor();
66 Version
["minor"] = V
.getMinor().value_or(0);
67 Version
["patch"] = V
.getSubminor().value_or(0);
71 /// Serialize the OS information in the Symbol Graph platform property.
73 /// The OS information in Symbol Graph contains the \c name of the OS, and an
74 /// optional \c minimumVersion semantic version field.
75 Object
serializeOperatingSystem(const Triple
&T
) {
77 OS
["name"] = T
.getOSTypeName(T
.getOS());
78 serializeObject(OS
, "minimumVersion",
79 serializeSemanticVersion(T
.getMinimumSupportedOSVersion()));
83 /// Serialize the platform information in the Symbol Graph module section.
85 /// The platform object describes a target platform triple in corresponding
86 /// three fields: \c architecture, \c vendor, and \c operatingSystem.
87 Object
serializePlatform(const Triple
&T
) {
89 Platform
["architecture"] = T
.getArchName();
90 Platform
["vendor"] = T
.getVendorName();
91 Platform
["operatingSystem"] = serializeOperatingSystem(T
);
95 /// Serialize a source position.
96 Object
serializeSourcePosition(const PresumedLoc
&Loc
) {
97 assert(Loc
.isValid() && "invalid source position");
99 Object SourcePosition
;
100 SourcePosition
["line"] = Loc
.getLine();
101 SourcePosition
["character"] = Loc
.getColumn();
103 return SourcePosition
;
106 /// Serialize a source location in file.
108 /// \param Loc The presumed location to serialize.
109 /// \param IncludeFileURI If true, include the file path of \p Loc as a URI.
110 /// Defaults to false.
111 Object
serializeSourceLocation(const PresumedLoc
&Loc
,
112 bool IncludeFileURI
= false) {
113 Object SourceLocation
;
114 serializeObject(SourceLocation
, "position", serializeSourcePosition(Loc
));
116 if (IncludeFileURI
) {
117 std::string FileURI
= "file://";
118 // Normalize file path to use forward slashes for the URI.
119 FileURI
+= sys::path::convert_to_slash(Loc
.getFilename());
120 SourceLocation
["uri"] = FileURI
;
123 return SourceLocation
;
126 /// Serialize a source range with begin and end locations.
127 Object
serializeSourceRange(const PresumedLoc
&BeginLoc
,
128 const PresumedLoc
&EndLoc
) {
130 serializeObject(SourceRange
, "start", serializeSourcePosition(BeginLoc
));
131 serializeObject(SourceRange
, "end", serializeSourcePosition(EndLoc
));
135 /// Serialize the availability attributes of a symbol.
137 /// Availability information contains the introduced, deprecated, and obsoleted
138 /// versions of the symbol for a given domain (roughly corresponds to a
139 /// platform) as semantic versions, if not default. Availability information
140 /// also contains flags to indicate if the symbol is unconditionally unavailable
141 /// or deprecated, i.e. \c __attribute__((unavailable)) and \c
142 /// __attribute__((deprecated)).
144 /// \returns \c None if the symbol has default availability attributes, or
145 /// an \c Array containing the formatted availability information.
146 Optional
<Array
> serializeAvailability(const AvailabilitySet
&Availabilities
) {
147 if (Availabilities
.isDefault())
150 Array AvailabilityArray
;
152 if (Availabilities
.isUnconditionallyDeprecated()) {
153 Object UnconditionallyDeprecated
;
154 UnconditionallyDeprecated
["domain"] = "*";
155 UnconditionallyDeprecated
["isUnconditionallyDeprecated"] = true;
156 AvailabilityArray
.emplace_back(std::move(UnconditionallyDeprecated
));
159 // Note unconditionally unavailable records are skipped.
161 for (const auto &AvailInfo
: Availabilities
) {
163 Availability
["domain"] = AvailInfo
.Domain
;
164 serializeObject(Availability
, "introducedVersion",
165 serializeSemanticVersion(AvailInfo
.Introduced
));
166 serializeObject(Availability
, "deprecatedVersion",
167 serializeSemanticVersion(AvailInfo
.Deprecated
));
168 serializeObject(Availability
, "obsoletedVersion",
169 serializeSemanticVersion(AvailInfo
.Obsoleted
));
170 AvailabilityArray
.emplace_back(std::move(Availability
));
173 return AvailabilityArray
;
176 /// Get the language name string for interface language references.
177 StringRef
getLanguageName(Language Lang
) {
182 return "objective-c";
184 // Unsupported language currently
186 case Language::ObjCXX
:
187 case Language::OpenCL
:
188 case Language::OpenCLCXX
:
190 case Language::RenderScript
:
194 // Languages that the frontend cannot parse and compile
195 case Language::Unknown
:
197 case Language::LLVM_IR
:
198 llvm_unreachable("Unsupported language kind");
201 llvm_unreachable("Unhandled language kind");
204 /// Serialize the identifier object as specified by the Symbol Graph format.
206 /// The identifier property of a symbol contains the USR for precise and unique
207 /// references, and the interface language name.
208 Object
serializeIdentifier(const APIRecord
&Record
, Language Lang
) {
210 Identifier
["precise"] = Record
.USR
;
211 Identifier
["interfaceLanguage"] = getLanguageName(Lang
);
216 /// Serialize the documentation comments attached to a symbol, as specified by
217 /// the Symbol Graph format.
219 /// The Symbol Graph \c docComment object contains an array of lines. Each line
220 /// represents one line of striped documentation comment, with source range
224 /// /// This is a documentation comment
225 /// ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' First line.
226 /// /// with multiple lines.
227 /// ^~~~~~~~~~~~~~~~~~~~~~~' Second line.
230 /// \returns \c None if \p Comment is empty, or an \c Object containing the
232 Optional
<Object
> serializeDocComment(const DocComment
&Comment
) {
238 for (const auto &CommentLine
: Comment
) {
240 Line
["text"] = CommentLine
.Text
;
241 serializeObject(Line
, "range",
242 serializeSourceRange(CommentLine
.Begin
, CommentLine
.End
));
243 LinesArray
.emplace_back(std::move(Line
));
245 serializeArray(DocComment
, "lines", LinesArray
);
250 /// Serialize the declaration fragments of a symbol.
252 /// The Symbol Graph declaration fragments is an array of tagged important
253 /// parts of a symbol's declaration. The fragments sequence can be joined to
254 /// form spans of declaration text, with attached information useful for
255 /// purposes like syntax-highlighting etc. For example:
257 /// const int pi; -> "declarationFragments" : [
259 /// "kind" : "keyword",
260 /// "spelling" : "const"
267 /// "kind" : "typeIdentifier",
268 /// "preciseIdentifier" : "c:I",
269 /// "spelling" : "int"
276 /// "kind" : "identifier",
277 /// "spelling" : "pi"
282 /// \returns \c None if \p DF is empty, or an \c Array containing the formatted
283 /// declaration fragments array.
284 Optional
<Array
> serializeDeclarationFragments(const DeclarationFragments
&DF
) {
285 if (DF
.getFragments().empty())
289 for (const auto &F
: DF
.getFragments()) {
291 Fragment
["spelling"] = F
.Spelling
;
292 Fragment
["kind"] = DeclarationFragments::getFragmentKindString(F
.Kind
);
293 if (!F
.PreciseIdentifier
.empty())
294 Fragment
["preciseIdentifier"] = F
.PreciseIdentifier
;
295 Fragments
.emplace_back(std::move(Fragment
));
301 /// Serialize the \c names field of a symbol as specified by the Symbol Graph
304 /// The Symbol Graph names field contains multiple representations of a symbol
305 /// that can be used for different applications:
306 /// - \c title : The simple declared name of the symbol;
307 /// - \c subHeading : An array of declaration fragments that provides tags,
308 /// and potentially more tokens (for example the \c +/- symbol for
309 /// Objective-C methods). Can be used as sub-headings for documentation.
310 Object
serializeNames(const APIRecord
&Record
) {
312 Names
["title"] = Record
.Name
;
313 serializeArray(Names
, "subHeading",
314 serializeDeclarationFragments(Record
.SubHeading
));
315 DeclarationFragments NavigatorFragments
;
316 NavigatorFragments
.append(Record
.Name
,
317 DeclarationFragments::FragmentKind::Identifier
,
318 /*PreciseIdentifier*/ "");
319 serializeArray(Names
, "navigator",
320 serializeDeclarationFragments(NavigatorFragments
));
325 /// Serialize the symbol kind information.
327 /// The Symbol Graph symbol kind property contains a shorthand \c identifier
328 /// which is prefixed by the source language name, useful for tooling to parse
329 /// the kind, and a \c displayName for rendering human-readable names.
330 Object
serializeSymbolKind(const APIRecord
&Record
, Language Lang
) {
331 auto AddLangPrefix
= [&Lang
](StringRef S
) -> std::string
{
332 return (getLanguageName(Lang
) + "." + S
).str();
336 switch (Record
.getKind()) {
337 case APIRecord::RK_GlobalFunction
:
338 Kind
["identifier"] = AddLangPrefix("func");
339 Kind
["displayName"] = "Function";
341 case APIRecord::RK_GlobalVariable
:
342 Kind
["identifier"] = AddLangPrefix("var");
343 Kind
["displayName"] = "Global Variable";
345 case APIRecord::RK_EnumConstant
:
346 Kind
["identifier"] = AddLangPrefix("enum.case");
347 Kind
["displayName"] = "Enumeration Case";
349 case APIRecord::RK_Enum
:
350 Kind
["identifier"] = AddLangPrefix("enum");
351 Kind
["displayName"] = "Enumeration";
353 case APIRecord::RK_StructField
:
354 Kind
["identifier"] = AddLangPrefix("property");
355 Kind
["displayName"] = "Instance Property";
357 case APIRecord::RK_Struct
:
358 Kind
["identifier"] = AddLangPrefix("struct");
359 Kind
["displayName"] = "Structure";
361 case APIRecord::RK_ObjCIvar
:
362 Kind
["identifier"] = AddLangPrefix("ivar");
363 Kind
["displayName"] = "Instance Variable";
365 case APIRecord::RK_ObjCMethod
:
366 if (cast
<ObjCMethodRecord
>(&Record
)->IsInstanceMethod
) {
367 Kind
["identifier"] = AddLangPrefix("method");
368 Kind
["displayName"] = "Instance Method";
370 Kind
["identifier"] = AddLangPrefix("type.method");
371 Kind
["displayName"] = "Type Method";
374 case APIRecord::RK_ObjCProperty
:
375 if (cast
<ObjCPropertyRecord
>(&Record
)->isClassProperty()) {
376 Kind
["identifier"] = AddLangPrefix("type.property");
377 Kind
["displayName"] = "Type Property";
379 Kind
["identifier"] = AddLangPrefix("property");
380 Kind
["displayName"] = "Instance Property";
383 case APIRecord::RK_ObjCInterface
:
384 Kind
["identifier"] = AddLangPrefix("class");
385 Kind
["displayName"] = "Class";
387 case APIRecord::RK_ObjCCategory
:
388 // We don't serialize out standalone Objective-C category symbols yet.
389 llvm_unreachable("Serializing standalone Objective-C category symbols is "
392 case APIRecord::RK_ObjCProtocol
:
393 Kind
["identifier"] = AddLangPrefix("protocol");
394 Kind
["displayName"] = "Protocol";
396 case APIRecord::RK_MacroDefinition
:
397 Kind
["identifier"] = AddLangPrefix("macro");
398 Kind
["displayName"] = "Macro";
400 case APIRecord::RK_Typedef
:
401 Kind
["identifier"] = AddLangPrefix("typealias");
402 Kind
["displayName"] = "Type Alias";
409 template <typename RecordTy
>
410 Optional
<Object
> serializeFunctionSignatureMixinImpl(const RecordTy
&Record
,
412 const auto &FS
= Record
.Signature
;
417 serializeArray(Signature
, "returns",
418 serializeDeclarationFragments(FS
.getReturnType()));
421 for (const auto &P
: FS
.getParameters()) {
423 Parameter
["name"] = P
.Name
;
424 serializeArray(Parameter
, "declarationFragments",
425 serializeDeclarationFragments(P
.Fragments
));
426 Parameters
.emplace_back(std::move(Parameter
));
429 if (!Parameters
.empty())
430 Signature
["parameters"] = std::move(Parameters
);
435 template <typename RecordTy
>
436 Optional
<Object
> serializeFunctionSignatureMixinImpl(const RecordTy
&Record
,
441 /// Serialize the function signature field, as specified by the
442 /// Symbol Graph format.
444 /// The Symbol Graph function signature property contains two arrays.
445 /// - The \c returns array is the declaration fragments of the return type;
446 /// - The \c parameters array contains names and declaration fragments of the
449 /// \returns \c None if \p FS is empty, or an \c Object containing the
450 /// formatted function signature.
451 template <typename RecordTy
>
452 void serializeFunctionSignatureMixin(Object
&Paren
, const RecordTy
&Record
) {
453 serializeObject(Paren
, "functionSignature",
454 serializeFunctionSignatureMixinImpl(
455 Record
, has_function_signature
<RecordTy
>()));
460 void SymbolGraphSerializer::anchor() {}
462 /// Defines the format version emitted by SymbolGraphSerializer.
463 const VersionTuple
SymbolGraphSerializer::FormatVersion
{0, 5, 3};
465 Object
SymbolGraphSerializer::serializeMetadata() const {
467 serializeObject(Metadata
, "formatVersion",
468 serializeSemanticVersion(FormatVersion
));
469 Metadata
["generator"] = clang::getClangFullVersion();
473 Object
SymbolGraphSerializer::serializeModule() const {
475 // The user is expected to always pass `--product-name=` on the command line
476 // to populate this field.
477 Module
["name"] = ProductName
;
478 serializeObject(Module
, "platform", serializePlatform(API
.getTarget()));
482 bool SymbolGraphSerializer::shouldSkip(const APIRecord
&Record
) const {
483 // Skip unconditionally unavailable symbols
484 if (Record
.Availabilities
.isUnconditionallyUnavailable())
487 // Filter out symbols prefixed with an underscored as they are understood to
488 // be symbols clients should not use.
489 if (Record
.Name
.startswith("_"))
495 template <typename RecordTy
>
497 SymbolGraphSerializer::serializeAPIRecord(const RecordTy
&Record
) const {
498 if (shouldSkip(Record
))
502 serializeObject(Obj
, "identifier",
503 serializeIdentifier(Record
, API
.getLanguage()));
504 serializeObject(Obj
, "kind", serializeSymbolKind(Record
, API
.getLanguage()));
505 serializeObject(Obj
, "names", serializeNames(Record
));
508 serializeSourceLocation(Record
.Location
, /*IncludeFileURI=*/true));
509 serializeArray(Obj
, "availability",
510 serializeAvailability(Record
.Availabilities
));
511 serializeObject(Obj
, "docComment", serializeDocComment(Record
.Comment
));
512 serializeArray(Obj
, "declarationFragments",
513 serializeDeclarationFragments(Record
.Declaration
));
514 // TODO: Once we keep track of symbol access information serialize it
516 Obj
["accessLevel"] = "public";
517 serializeArray(Obj
, "pathComponents", Array(PathComponents
));
519 serializeFunctionSignatureMixin(Obj
, Record
);
524 template <typename MemberTy
>
525 void SymbolGraphSerializer::serializeMembers(
526 const APIRecord
&Record
,
527 const SmallVector
<std::unique_ptr
<MemberTy
>> &Members
) {
528 for (const auto &Member
: Members
) {
529 auto MemberPathComponentGuard
= makePathComponentGuard(Member
->Name
);
530 auto MemberRecord
= serializeAPIRecord(*Member
);
534 Symbols
.emplace_back(std::move(*MemberRecord
));
535 serializeRelationship(RelationshipKind::MemberOf
, *Member
, Record
);
539 StringRef
SymbolGraphSerializer::getRelationshipString(RelationshipKind Kind
) {
541 case RelationshipKind::MemberOf
:
543 case RelationshipKind::InheritsFrom
:
544 return "inheritsFrom";
545 case RelationshipKind::ConformsTo
:
548 llvm_unreachable("Unhandled relationship kind");
551 void SymbolGraphSerializer::serializeRelationship(RelationshipKind Kind
,
552 SymbolReference Source
,
553 SymbolReference Target
) {
555 Relationship
["source"] = Source
.USR
;
556 Relationship
["target"] = Target
.USR
;
557 Relationship
["kind"] = getRelationshipString(Kind
);
559 Relationships
.emplace_back(std::move(Relationship
));
562 void SymbolGraphSerializer::serializeGlobalFunctionRecord(
563 const GlobalFunctionRecord
&Record
) {
564 auto GlobalPathComponentGuard
= makePathComponentGuard(Record
.Name
);
566 auto Obj
= serializeAPIRecord(Record
);
570 Symbols
.emplace_back(std::move(*Obj
));
573 void SymbolGraphSerializer::serializeGlobalVariableRecord(
574 const GlobalVariableRecord
&Record
) {
575 auto GlobalPathComponentGuard
= makePathComponentGuard(Record
.Name
);
577 auto Obj
= serializeAPIRecord(Record
);
581 Symbols
.emplace_back(std::move(*Obj
));
584 void SymbolGraphSerializer::serializeEnumRecord(const EnumRecord
&Record
) {
585 auto EnumPathComponentGuard
= makePathComponentGuard(Record
.Name
);
586 auto Enum
= serializeAPIRecord(Record
);
590 Symbols
.emplace_back(std::move(*Enum
));
591 serializeMembers(Record
, Record
.Constants
);
594 void SymbolGraphSerializer::serializeStructRecord(const StructRecord
&Record
) {
595 auto StructPathComponentGuard
= makePathComponentGuard(Record
.Name
);
596 auto Struct
= serializeAPIRecord(Record
);
600 Symbols
.emplace_back(std::move(*Struct
));
601 serializeMembers(Record
, Record
.Fields
);
604 void SymbolGraphSerializer::serializeObjCContainerRecord(
605 const ObjCContainerRecord
&Record
) {
606 auto ObjCContainerPathComponentGuard
= makePathComponentGuard(Record
.Name
);
607 auto ObjCContainer
= serializeAPIRecord(Record
);
611 Symbols
.emplace_back(std::move(*ObjCContainer
));
613 serializeMembers(Record
, Record
.Ivars
);
614 serializeMembers(Record
, Record
.Methods
);
615 serializeMembers(Record
, Record
.Properties
);
617 for (const auto &Protocol
: Record
.Protocols
)
618 // Record that Record conforms to Protocol.
619 serializeRelationship(RelationshipKind::ConformsTo
, Record
, Protocol
);
621 if (auto *ObjCInterface
= dyn_cast
<ObjCInterfaceRecord
>(&Record
)) {
622 if (!ObjCInterface
->SuperClass
.empty())
623 // If Record is an Objective-C interface record and it has a super class,
624 // record that Record is inherited from SuperClass.
625 serializeRelationship(RelationshipKind::InheritsFrom
, Record
,
626 ObjCInterface
->SuperClass
);
628 // Members of categories extending an interface are serialized as members of
630 for (const auto *Category
: ObjCInterface
->Categories
) {
631 serializeMembers(Record
, Category
->Ivars
);
632 serializeMembers(Record
, Category
->Methods
);
633 serializeMembers(Record
, Category
->Properties
);
635 // Surface the protocols of the the category to the interface.
636 for (const auto &Protocol
: Category
->Protocols
)
637 serializeRelationship(RelationshipKind::ConformsTo
, Record
, Protocol
);
642 void SymbolGraphSerializer::serializeMacroDefinitionRecord(
643 const MacroDefinitionRecord
&Record
) {
644 auto MacroPathComponentGuard
= makePathComponentGuard(Record
.Name
);
645 auto Macro
= serializeAPIRecord(Record
);
650 Symbols
.emplace_back(std::move(*Macro
));
653 void SymbolGraphSerializer::serializeTypedefRecord(
654 const TypedefRecord
&Record
) {
655 // Typedefs of anonymous types have their entries unified with the underlying
657 bool ShouldDrop
= Record
.UnderlyingType
.Name
.empty();
658 // enums declared with `NS_OPTION` have a named enum and a named typedef, with
660 ShouldDrop
|= (Record
.UnderlyingType
.Name
== Record
.Name
);
664 auto TypedefPathComponentGuard
= makePathComponentGuard(Record
.Name
);
665 auto Typedef
= serializeAPIRecord(Record
);
669 (*Typedef
)["type"] = Record
.UnderlyingType
.USR
;
671 Symbols
.emplace_back(std::move(*Typedef
));
674 SymbolGraphSerializer::PathComponentGuard
675 SymbolGraphSerializer::makePathComponentGuard(StringRef Component
) {
676 return PathComponentGuard(PathComponents
, Component
);
679 Object
SymbolGraphSerializer::serialize() {
681 serializeObject(Root
, "metadata", serializeMetadata());
682 serializeObject(Root
, "module", serializeModule());
684 // Serialize global variables in the API set.
685 for (const auto &GlobalVar
: API
.getGlobalVariables())
686 serializeGlobalVariableRecord(*GlobalVar
.second
);
688 for (const auto &GlobalFunction
: API
.getGlobalFunctions())
689 serializeGlobalFunctionRecord(*GlobalFunction
.second
);
691 // Serialize enum records in the API set.
692 for (const auto &Enum
: API
.getEnums())
693 serializeEnumRecord(*Enum
.second
);
695 // Serialize struct records in the API set.
696 for (const auto &Struct
: API
.getStructs())
697 serializeStructRecord(*Struct
.second
);
699 // Serialize Objective-C interface records in the API set.
700 for (const auto &ObjCInterface
: API
.getObjCInterfaces())
701 serializeObjCContainerRecord(*ObjCInterface
.second
);
703 // Serialize Objective-C protocol records in the API set.
704 for (const auto &ObjCProtocol
: API
.getObjCProtocols())
705 serializeObjCContainerRecord(*ObjCProtocol
.second
);
707 for (const auto &Macro
: API
.getMacros())
708 serializeMacroDefinitionRecord(*Macro
.second
);
710 for (const auto &Typedef
: API
.getTypedefs())
711 serializeTypedefRecord(*Typedef
.second
);
713 Root
["symbols"] = std::move(Symbols
);
714 Root
["relationships"] = std::move(Relationships
);
719 void SymbolGraphSerializer::serialize(raw_ostream
&os
) {
720 Object root
= serialize();
722 os
<< formatv("{0}", Value(std::move(root
))) << "\n";
724 os
<< formatv("{0:2}", Value(std::move(root
))) << "\n";