1 //===-- Serialization.cpp - Binary serialization of index data ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "Serialization.h"
12 #include "index/MemIndex.h"
13 #include "index/SymbolLocation.h"
14 #include "index/SymbolOrigin.h"
15 #include "index/dex/Dex.h"
16 #include "support/Logger.h"
17 #include "support/Trace.h"
18 #include "clang/Tooling/CompilationDatabase.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/Compiler.h"
21 #include "llvm/Support/Compression.h"
22 #include "llvm/Support/Endian.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/raw_ostream.h"
33 // We use little-endian 32 bit ints, sometimes with variable-length encoding.
35 // Variable-length int encoding (varint) uses the bottom 7 bits of each byte
36 // to encode the number, and the top bit to indicate whether more bytes follow.
37 // e.g. 9a 2f means [0x1a and keep reading, 0x2f and stop].
38 // This represents 0x1a | 0x2f<<7 = 6042.
39 // A 32-bit integer takes 1-5 bytes to encode; small numbers are more compact.
41 // Reads binary data from a StringRef, and keeps track of position.
43 const char *Begin
, *End
;
47 Reader(llvm::StringRef Data
) : Begin(Data
.begin()), End(Data
.end()) {}
48 // The "error" bit is set by reading past EOF or reading invalid data.
49 // When in an error state, reads may return zero values: callers should check.
50 bool err() const { return Err
; }
51 // Did we read all the data, or encounter an error?
52 bool eof() const { return Begin
== End
|| Err
; }
53 // All the data we didn't read yet.
54 llvm::StringRef
rest() const { return llvm::StringRef(Begin
, End
- Begin
); }
57 if (LLVM_UNLIKELY(Begin
== End
)) {
64 uint32_t consume32() {
65 if (LLVM_UNLIKELY(Begin
+ 4 > End
)) {
69 auto Ret
= llvm::support::endian::read32le(Begin
);
74 llvm::StringRef
consume(int N
) {
75 if (LLVM_UNLIKELY(Begin
+ N
> End
)) {
77 return llvm::StringRef();
79 llvm::StringRef
Ret(Begin
, N
);
84 uint32_t consumeVar() {
85 constexpr static uint8_t More
= 1 << 7;
87 // Use a 32 bit unsigned here to prevent promotion to signed int (unless int
88 // is wider than 32 bits).
89 uint32_t B
= consume8();
90 if (LLVM_LIKELY(!(B
& More
)))
92 uint32_t Val
= B
& ~More
;
93 for (int Shift
= 7; B
& More
&& Shift
< 32; Shift
+= 7) {
95 // 5th byte of a varint can only have lowest 4 bits set.
96 assert((Shift
!= 28 || B
== (B
& 0x0f)) && "Invalid varint encoding");
97 Val
|= (B
& ~More
) << Shift
;
102 llvm::StringRef
consumeString(llvm::ArrayRef
<llvm::StringRef
> Strings
) {
103 auto StringIndex
= consumeVar();
104 if (LLVM_UNLIKELY(StringIndex
>= Strings
.size())) {
106 return llvm::StringRef();
108 return Strings
[StringIndex
];
111 SymbolID
consumeID() {
112 llvm::StringRef Raw
= consume(SymbolID::RawSize
); // short if truncated.
113 return LLVM_UNLIKELY(err()) ? SymbolID() : SymbolID::fromRaw(Raw
);
116 // Read a varint (as consumeVar) and resize the container accordingly.
117 // If the size is invalid, return false and mark an error.
118 // (The caller should abort in this case).
119 template <typename T
> [[nodiscard
]] bool consumeSize(T
&Container
) {
120 auto Size
= consumeVar();
121 // Conservatively assume each element is at least one byte.
122 if (Size
> (size_t)(End
- Begin
)) {
126 Container
.resize(Size
);
131 void write32(uint32_t I
, llvm::raw_ostream
&OS
) {
133 llvm::support::endian::write32le(Buf
, I
);
134 OS
.write(Buf
, sizeof(Buf
));
137 void writeVar(uint32_t I
, llvm::raw_ostream
&OS
) {
138 constexpr static uint8_t More
= 1 << 7;
139 if (LLVM_LIKELY(I
< 1 << 7)) {
153 // STRING TABLE ENCODING
154 // Index data has many string fields, and many strings are identical.
155 // We store each string once, and refer to them by index.
157 // The string table's format is:
158 // - UncompressedSize : uint32 (or 0 for no compression)
159 // - CompressedData : byte[CompressedSize]
161 // CompressedData is a zlib-compressed byte[UncompressedSize].
162 // It contains a sequence of null-terminated strings, e.g. "foo\0bar\0".
163 // These are sorted to improve compression.
165 // Maps each string to a canonical representation.
166 // Strings remain owned externally (e.g. by SymbolSlab).
167 class StringTableOut
{
168 llvm::DenseSet
<llvm::StringRef
> Unique
;
169 std::vector
<llvm::StringRef
> Sorted
;
170 // Since strings are interned, look up can be by pointer.
171 llvm::DenseMap
<std::pair
<const char *, size_t>, unsigned> Index
;
175 // Ensure there's at least one string in the table.
176 // Table size zero is reserved to indicate no compression.
179 // Add a string to the table. Overwrites S if an identical string exists.
180 void intern(llvm::StringRef
&S
) { S
= *Unique
.insert(S
).first
; };
181 // Finalize the table and write it to OS. No more strings may be added.
182 void finalize(llvm::raw_ostream
&OS
) {
183 Sorted
= {Unique
.begin(), Unique
.end()};
185 for (unsigned I
= 0; I
< Sorted
.size(); ++I
)
186 Index
.try_emplace({Sorted
[I
].data(), Sorted
[I
].size()}, I
);
188 std::string RawTable
;
189 for (llvm::StringRef S
: Sorted
) {
190 RawTable
.append(std::string(S
));
191 RawTable
.push_back(0);
193 if (llvm::compression::zlib::isAvailable()) {
194 llvm::SmallVector
<uint8_t, 0> Compressed
;
195 llvm::compression::zlib::compress(llvm::arrayRefFromStringRef(RawTable
),
197 write32(RawTable
.size(), OS
);
198 OS
<< llvm::toStringRef(Compressed
);
200 write32(0, OS
); // No compression.
204 // Get the ID of an string, which must be interned. Table must be finalized.
205 unsigned index(llvm::StringRef S
) const {
206 assert(!Sorted
.empty() && "table not finalized");
207 assert(Index
.count({S
.data(), S
.size()}) && "string not interned");
208 return Index
.find({S
.data(), S
.size()})->second
;
212 struct StringTableIn
{
213 llvm::BumpPtrAllocator Arena
;
214 std::vector
<llvm::StringRef
> Strings
;
217 llvm::Expected
<StringTableIn
> readStringTable(llvm::StringRef Data
) {
219 size_t UncompressedSize
= R
.consume32();
221 return error("Truncated string table");
223 llvm::StringRef Uncompressed
;
224 llvm::SmallVector
<uint8_t, 0> UncompressedStorage
;
225 if (UncompressedSize
== 0) // No compression
226 Uncompressed
= R
.rest();
227 else if (llvm::compression::zlib::isAvailable()) {
228 // Don't allocate a massive buffer if UncompressedSize was corrupted
229 // This is effective for sharded index, but not big monolithic ones, as
230 // once compressed size reaches 4MB nothing can be ruled out.
231 // Theoretical max ratio from https://zlib.net/zlib_tech.html
232 constexpr int MaxCompressionRatio
= 1032;
233 if (UncompressedSize
/ MaxCompressionRatio
> R
.rest().size())
234 return error("Bad stri table: uncompress {0} -> {1} bytes is implausible",
235 R
.rest().size(), UncompressedSize
);
237 if (llvm::Error E
= llvm::compression::zlib::decompress(
238 llvm::arrayRefFromStringRef(R
.rest()), UncompressedStorage
,
241 Uncompressed
= toStringRef(UncompressedStorage
);
243 return error("Compressed string table, but zlib is unavailable");
246 llvm::StringSaver
Saver(Table
.Arena
);
247 R
= Reader(Uncompressed
);
248 for (Reader
R(Uncompressed
); !R
.eof();) {
249 auto Len
= R
.rest().find(0);
250 if (Len
== llvm::StringRef::npos
)
251 return error("Bad string table: not null terminated");
252 Table
.Strings
.push_back(Saver
.save(R
.consume(Len
)));
256 return error("Truncated string table");
257 return std::move(Table
);
261 // Each field of clangd::Symbol is encoded in turn (see implementation).
262 // - StringRef fields encode as varint (index into the string table)
263 // - enums encode as the underlying type
264 // - most numbers encode as varint
266 void writeLocation(const SymbolLocation
&Loc
, const StringTableOut
&Strings
,
267 llvm::raw_ostream
&OS
) {
268 writeVar(Strings
.index(Loc
.FileURI
), OS
);
269 for (const auto &Endpoint
: {Loc
.Start
, Loc
.End
}) {
270 writeVar(Endpoint
.line(), OS
);
271 writeVar(Endpoint
.column(), OS
);
275 SymbolLocation
readLocation(Reader
&Data
,
276 llvm::ArrayRef
<llvm::StringRef
> Strings
) {
278 Loc
.FileURI
= Data
.consumeString(Strings
).data();
279 for (auto *Endpoint
: {&Loc
.Start
, &Loc
.End
}) {
280 Endpoint
->setLine(Data
.consumeVar());
281 Endpoint
->setColumn(Data
.consumeVar());
286 IncludeGraphNode
readIncludeGraphNode(Reader
&Data
,
287 llvm::ArrayRef
<llvm::StringRef
> Strings
) {
288 IncludeGraphNode IGN
;
289 IGN
.Flags
= static_cast<IncludeGraphNode::SourceFlag
>(Data
.consume8());
290 IGN
.URI
= Data
.consumeString(Strings
);
291 llvm::StringRef Digest
= Data
.consume(IGN
.Digest
.size());
292 std::copy(Digest
.bytes_begin(), Digest
.bytes_end(), IGN
.Digest
.begin());
293 if (!Data
.consumeSize(IGN
.DirectIncludes
))
295 for (llvm::StringRef
&Include
: IGN
.DirectIncludes
)
296 Include
= Data
.consumeString(Strings
);
300 void writeIncludeGraphNode(const IncludeGraphNode
&IGN
,
301 const StringTableOut
&Strings
,
302 llvm::raw_ostream
&OS
) {
303 OS
.write(static_cast<uint8_t>(IGN
.Flags
));
304 writeVar(Strings
.index(IGN
.URI
), OS
);
305 llvm::StringRef
Hash(reinterpret_cast<const char *>(IGN
.Digest
.data()),
308 writeVar(IGN
.DirectIncludes
.size(), OS
);
309 for (llvm::StringRef Include
: IGN
.DirectIncludes
)
310 writeVar(Strings
.index(Include
), OS
);
313 void writeSymbol(const Symbol
&Sym
, const StringTableOut
&Strings
,
314 llvm::raw_ostream
&OS
) {
315 OS
<< Sym
.ID
.raw(); // TODO: once we start writing xrefs and posting lists,
316 // symbol IDs should probably be in a string table.
317 OS
.write(static_cast<uint8_t>(Sym
.SymInfo
.Kind
));
318 OS
.write(static_cast<uint8_t>(Sym
.SymInfo
.Lang
));
319 writeVar(Strings
.index(Sym
.Name
), OS
);
320 writeVar(Strings
.index(Sym
.Scope
), OS
);
321 writeVar(Strings
.index(Sym
.TemplateSpecializationArgs
), OS
);
322 writeLocation(Sym
.Definition
, Strings
, OS
);
323 writeLocation(Sym
.CanonicalDeclaration
, Strings
, OS
);
324 writeVar(Sym
.References
, OS
);
325 OS
.write(static_cast<uint8_t>(Sym
.Flags
));
326 writeVar(Strings
.index(Sym
.Signature
), OS
);
327 writeVar(Strings
.index(Sym
.CompletionSnippetSuffix
), OS
);
328 writeVar(Strings
.index(Sym
.Documentation
), OS
);
329 writeVar(Strings
.index(Sym
.ReturnType
), OS
);
330 writeVar(Strings
.index(Sym
.Type
), OS
);
332 auto WriteInclude
= [&](const Symbol::IncludeHeaderWithReferences
&Include
) {
333 writeVar(Strings
.index(Include
.IncludeHeader
), OS
);
334 writeVar((Include
.References
<< 2) | Include
.SupportedDirectives
, OS
);
336 writeVar(Sym
.IncludeHeaders
.size(), OS
);
337 for (const auto &Include
: Sym
.IncludeHeaders
)
338 WriteInclude(Include
);
341 Symbol
readSymbol(Reader
&Data
, llvm::ArrayRef
<llvm::StringRef
> Strings
,
342 SymbolOrigin Origin
) {
344 Sym
.ID
= Data
.consumeID();
345 Sym
.SymInfo
.Kind
= static_cast<index::SymbolKind
>(Data
.consume8());
346 Sym
.SymInfo
.Lang
= static_cast<index::SymbolLanguage
>(Data
.consume8());
347 Sym
.Name
= Data
.consumeString(Strings
);
348 Sym
.Scope
= Data
.consumeString(Strings
);
349 Sym
.TemplateSpecializationArgs
= Data
.consumeString(Strings
);
350 Sym
.Definition
= readLocation(Data
, Strings
);
351 Sym
.CanonicalDeclaration
= readLocation(Data
, Strings
);
352 Sym
.References
= Data
.consumeVar();
353 Sym
.Flags
= static_cast<Symbol::SymbolFlag
>(Data
.consume8());
355 Sym
.Signature
= Data
.consumeString(Strings
);
356 Sym
.CompletionSnippetSuffix
= Data
.consumeString(Strings
);
357 Sym
.Documentation
= Data
.consumeString(Strings
);
358 Sym
.ReturnType
= Data
.consumeString(Strings
);
359 Sym
.Type
= Data
.consumeString(Strings
);
360 if (!Data
.consumeSize(Sym
.IncludeHeaders
))
362 for (auto &I
: Sym
.IncludeHeaders
) {
363 I
.IncludeHeader
= Data
.consumeString(Strings
);
364 uint32_t RefsWithDirectives
= Data
.consumeVar();
365 I
.References
= RefsWithDirectives
>> 2;
366 I
.SupportedDirectives
= RefsWithDirectives
& 0x3;
372 // A refs section has data grouped by Symbol. Each symbol has:
373 // - SymbolID: 8 bytes
376 // Fields of Ref are encoded in turn, see implementation.
378 void writeRefs(const SymbolID
&ID
, llvm::ArrayRef
<Ref
> Refs
,
379 const StringTableOut
&Strings
, llvm::raw_ostream
&OS
) {
381 writeVar(Refs
.size(), OS
);
382 for (const auto &Ref
: Refs
) {
383 OS
.write(static_cast<unsigned char>(Ref
.Kind
));
384 writeLocation(Ref
.Location
, Strings
, OS
);
385 OS
<< Ref
.Container
.raw();
389 std::pair
<SymbolID
, std::vector
<Ref
>>
390 readRefs(Reader
&Data
, llvm::ArrayRef
<llvm::StringRef
> Strings
) {
391 std::pair
<SymbolID
, std::vector
<Ref
>> Result
;
392 Result
.first
= Data
.consumeID();
393 if (!Data
.consumeSize(Result
.second
))
395 for (auto &Ref
: Result
.second
) {
396 Ref
.Kind
= static_cast<RefKind
>(Data
.consume8());
397 Ref
.Location
= readLocation(Data
, Strings
);
398 Ref
.Container
= Data
.consumeID();
403 // RELATIONS ENCODING
404 // A relations section is a flat list of relations. Each relation has:
405 // - SymbolID (subject): 8 bytes
406 // - relation kind (predicate): 1 byte
407 // - SymbolID (object): 8 bytes
408 // In the future, we might prefer a packed representation if the need arises.
410 void writeRelation(const Relation
&R
, llvm::raw_ostream
&OS
) {
411 OS
<< R
.Subject
.raw();
412 OS
.write(static_cast<uint8_t>(R
.Predicate
));
413 OS
<< R
.Object
.raw();
416 Relation
readRelation(Reader
&Data
) {
417 SymbolID Subject
= Data
.consumeID();
418 RelationKind Predicate
= static_cast<RelationKind
>(Data
.consume8());
419 SymbolID Object
= Data
.consumeID();
420 return {Subject
, Predicate
, Object
};
423 struct InternedCompileCommand
{
424 llvm::StringRef Directory
;
425 std::vector
<llvm::StringRef
> CommandLine
;
428 void writeCompileCommand(const InternedCompileCommand
&Cmd
,
429 const StringTableOut
&Strings
,
430 llvm::raw_ostream
&CmdOS
) {
431 writeVar(Strings
.index(Cmd
.Directory
), CmdOS
);
432 writeVar(Cmd
.CommandLine
.size(), CmdOS
);
433 for (llvm::StringRef C
: Cmd
.CommandLine
)
434 writeVar(Strings
.index(C
), CmdOS
);
437 InternedCompileCommand
438 readCompileCommand(Reader CmdReader
, llvm::ArrayRef
<llvm::StringRef
> Strings
) {
439 InternedCompileCommand Cmd
;
440 Cmd
.Directory
= CmdReader
.consumeString(Strings
);
441 if (!CmdReader
.consumeSize(Cmd
.CommandLine
))
443 for (llvm::StringRef
&C
: Cmd
.CommandLine
)
444 C
= CmdReader
.consumeString(Strings
);
449 // A file is a RIFF chunk with type 'CdIx'.
450 // It contains the sections:
451 // - meta: version number
452 // - srcs: information related to include graph
453 // - stri: string table
455 // - refs: references to symbols
457 // The current versioning scheme is simple - non-current versions are rejected.
458 // If you make a breaking change, bump this version number to invalidate stored
459 // data. Later we may want to support some backward compatibility.
460 constexpr static uint32_t Version
= 19;
462 llvm::Expected
<IndexFileIn
> readRIFF(llvm::StringRef Data
,
463 SymbolOrigin Origin
) {
464 auto RIFF
= riff::readFile(Data
);
466 return RIFF
.takeError();
467 if (RIFF
->Type
!= riff::fourCC("CdIx"))
468 return error("wrong RIFF filetype: {0}", riff::fourCCStr(RIFF
->Type
));
469 llvm::StringMap
<llvm::StringRef
> Chunks
;
470 for (const auto &Chunk
: RIFF
->Chunks
)
471 Chunks
.try_emplace(llvm::StringRef(Chunk
.ID
.data(), Chunk
.ID
.size()),
474 if (!Chunks
.count("meta"))
475 return error("missing meta chunk");
476 Reader
Meta(Chunks
.lookup("meta"));
477 auto SeenVersion
= Meta
.consume32();
478 if (SeenVersion
!= Version
)
479 return error("wrong version: want {0}, got {1}", Version
, SeenVersion
);
481 // meta chunk is checked above, as we prefer the "version mismatch" error.
482 for (llvm::StringRef RequiredChunk
: {"stri"})
483 if (!Chunks
.count(RequiredChunk
))
484 return error("missing required chunk {0}", RequiredChunk
);
486 auto Strings
= readStringTable(Chunks
.lookup("stri"));
488 return Strings
.takeError();
491 if (Chunks
.count("srcs")) {
492 Reader
SrcsReader(Chunks
.lookup("srcs"));
493 Result
.Sources
.emplace();
494 while (!SrcsReader
.eof()) {
495 auto IGN
= readIncludeGraphNode(SrcsReader
, Strings
->Strings
);
496 auto Entry
= Result
.Sources
->try_emplace(IGN
.URI
).first
;
497 Entry
->getValue() = std::move(IGN
);
498 // We change all the strings inside the structure to point at the keys in
499 // the map, since it is the only copy of the string that's going to live.
500 Entry
->getValue().URI
= Entry
->getKey();
501 for (auto &Include
: Entry
->getValue().DirectIncludes
)
502 Include
= Result
.Sources
->try_emplace(Include
).first
->getKey();
504 if (SrcsReader
.err())
505 return error("malformed or truncated include uri");
508 if (Chunks
.count("symb")) {
509 Reader
SymbolReader(Chunks
.lookup("symb"));
510 SymbolSlab::Builder Symbols
;
511 while (!SymbolReader
.eof())
512 Symbols
.insert(readSymbol(SymbolReader
, Strings
->Strings
, Origin
));
513 if (SymbolReader
.err())
514 return error("malformed or truncated symbol");
515 Result
.Symbols
= std::move(Symbols
).build();
517 if (Chunks
.count("refs")) {
518 Reader
RefsReader(Chunks
.lookup("refs"));
519 RefSlab::Builder Refs
;
520 while (!RefsReader
.eof()) {
521 auto RefsBundle
= readRefs(RefsReader
, Strings
->Strings
);
522 for (const auto &Ref
: RefsBundle
.second
) // FIXME: bulk insert?
523 Refs
.insert(RefsBundle
.first
, Ref
);
525 if (RefsReader
.err())
526 return error("malformed or truncated refs");
527 Result
.Refs
= std::move(Refs
).build();
529 if (Chunks
.count("rela")) {
530 Reader
RelationsReader(Chunks
.lookup("rela"));
531 RelationSlab::Builder Relations
;
532 while (!RelationsReader
.eof())
533 Relations
.insert(readRelation(RelationsReader
));
534 if (RelationsReader
.err())
535 return error("malformed or truncated relations");
536 Result
.Relations
= std::move(Relations
).build();
538 if (Chunks
.count("cmdl")) {
539 Reader
CmdReader(Chunks
.lookup("cmdl"));
540 InternedCompileCommand Cmd
=
541 readCompileCommand(CmdReader
, Strings
->Strings
);
543 return error("malformed or truncated commandline section");
544 Result
.Cmd
.emplace();
545 Result
.Cmd
->Directory
= std::string(Cmd
.Directory
);
546 Result
.Cmd
->CommandLine
.reserve(Cmd
.CommandLine
.size());
547 for (llvm::StringRef C
: Cmd
.CommandLine
)
548 Result
.Cmd
->CommandLine
.emplace_back(C
);
550 return std::move(Result
);
553 template <class Callback
>
554 void visitStrings(IncludeGraphNode
&IGN
, const Callback
&CB
) {
556 for (llvm::StringRef
&Include
: IGN
.DirectIncludes
)
560 void writeRIFF(const IndexFileOut
&Data
, llvm::raw_ostream
&OS
) {
561 assert(Data
.Symbols
&& "An index file without symbols makes no sense!");
563 RIFF
.Type
= riff::fourCC("CdIx");
565 llvm::SmallString
<4> Meta
;
567 llvm::raw_svector_ostream
MetaOS(Meta
);
568 write32(Version
, MetaOS
);
570 RIFF
.Chunks
.push_back({riff::fourCC("meta"), Meta
});
572 StringTableOut Strings
;
573 std::vector
<Symbol
> Symbols
;
574 for (const auto &Sym
: *Data
.Symbols
) {
575 Symbols
.emplace_back(Sym
);
576 visitStrings(Symbols
.back(),
577 [&](llvm::StringRef
&S
) { Strings
.intern(S
); });
579 std::vector
<IncludeGraphNode
> Sources
;
581 for (const auto &Source
: *Data
.Sources
) {
582 Sources
.push_back(Source
.getValue());
583 visitStrings(Sources
.back(),
584 [&](llvm::StringRef
&S
) { Strings
.intern(S
); });
587 std::vector
<std::pair
<SymbolID
, std::vector
<Ref
>>> Refs
;
589 for (const auto &Sym
: *Data
.Refs
) {
590 Refs
.emplace_back(Sym
);
591 for (auto &Ref
: Refs
.back().second
) {
592 llvm::StringRef File
= Ref
.Location
.FileURI
;
593 Strings
.intern(File
);
594 Ref
.Location
.FileURI
= File
.data();
599 std::vector
<Relation
> Relations
;
600 if (Data
.Relations
) {
601 for (const auto &Relation
: *Data
.Relations
) {
602 Relations
.emplace_back(Relation
);
603 // No strings to be interned in relations.
607 InternedCompileCommand InternedCmd
;
609 InternedCmd
.CommandLine
.reserve(Data
.Cmd
->CommandLine
.size());
610 InternedCmd
.Directory
= Data
.Cmd
->Directory
;
611 Strings
.intern(InternedCmd
.Directory
);
612 for (llvm::StringRef C
: Data
.Cmd
->CommandLine
) {
613 InternedCmd
.CommandLine
.emplace_back(C
);
614 Strings
.intern(InternedCmd
.CommandLine
.back());
618 std::string StringSection
;
620 llvm::raw_string_ostream
StringOS(StringSection
);
621 Strings
.finalize(StringOS
);
623 RIFF
.Chunks
.push_back({riff::fourCC("stri"), StringSection
});
625 std::string SymbolSection
;
627 llvm::raw_string_ostream
SymbolOS(SymbolSection
);
628 for (const auto &Sym
: Symbols
)
629 writeSymbol(Sym
, Strings
, SymbolOS
);
631 RIFF
.Chunks
.push_back({riff::fourCC("symb"), SymbolSection
});
633 std::string RefsSection
;
636 llvm::raw_string_ostream
RefsOS(RefsSection
);
637 for (const auto &Sym
: Refs
)
638 writeRefs(Sym
.first
, Sym
.second
, Strings
, RefsOS
);
640 RIFF
.Chunks
.push_back({riff::fourCC("refs"), RefsSection
});
643 std::string RelationSection
;
644 if (Data
.Relations
) {
646 llvm::raw_string_ostream RelationOS
{RelationSection
};
647 for (const auto &Relation
: Relations
)
648 writeRelation(Relation
, RelationOS
);
650 RIFF
.Chunks
.push_back({riff::fourCC("rela"), RelationSection
});
653 std::string SrcsSection
;
656 llvm::raw_string_ostream
SrcsOS(SrcsSection
);
657 for (const auto &SF
: Sources
)
658 writeIncludeGraphNode(SF
, Strings
, SrcsOS
);
660 RIFF
.Chunks
.push_back({riff::fourCC("srcs"), SrcsSection
});
663 std::string CmdlSection
;
666 llvm::raw_string_ostream
CmdOS(CmdlSection
);
667 writeCompileCommand(InternedCmd
, Strings
, CmdOS
);
669 RIFF
.Chunks
.push_back({riff::fourCC("cmdl"), CmdlSection
});
677 // Defined in YAMLSerialization.cpp.
678 void writeYAML(const IndexFileOut
&, llvm::raw_ostream
&);
679 llvm::Expected
<IndexFileIn
> readYAML(llvm::StringRef
, SymbolOrigin Origin
);
681 llvm::raw_ostream
&operator<<(llvm::raw_ostream
&OS
, const IndexFileOut
&O
) {
683 case IndexFileFormat::RIFF
:
686 case IndexFileFormat::YAML
:
693 llvm::Expected
<IndexFileIn
> readIndexFile(llvm::StringRef Data
,
694 SymbolOrigin Origin
) {
695 if (Data
.starts_with("RIFF")) {
696 return readRIFF(Data
, Origin
);
698 if (auto YAMLContents
= readYAML(Data
, Origin
)) {
699 return std::move(*YAMLContents
);
701 return error("Not a RIFF file and failed to parse as YAML: {0}",
702 YAMLContents
.takeError());
706 std::unique_ptr
<SymbolIndex
> loadIndex(llvm::StringRef SymbolFilename
,
707 SymbolOrigin Origin
, bool UseDex
) {
708 trace::Span
OverallTracer("LoadIndex");
709 auto Buffer
= llvm::MemoryBuffer::getFile(SymbolFilename
);
711 elog("Can't open {0}: {1}", SymbolFilename
, Buffer
.getError().message());
717 RelationSlab Relations
;
719 trace::Span
Tracer("ParseIndex");
720 if (auto I
= readIndexFile(Buffer
->get()->getBuffer(), Origin
)) {
722 Symbols
= std::move(*I
->Symbols
);
724 Refs
= std::move(*I
->Refs
);
726 Relations
= std::move(*I
->Relations
);
728 elog("Bad index file: {0}", I
.takeError());
733 size_t NumSym
= Symbols
.size();
734 size_t NumRefs
= Refs
.numRefs();
735 size_t NumRelations
= Relations
.size();
737 trace::Span
Tracer("BuildIndex");
738 auto Index
= UseDex
? dex::Dex::build(std::move(Symbols
), std::move(Refs
),
739 std::move(Relations
))
740 : MemIndex::build(std::move(Symbols
), std::move(Refs
),
741 std::move(Relations
));
742 vlog("Loaded {0} from {1} with estimated memory usage {2} bytes\n"
743 " - number of symbols: {3}\n"
744 " - number of refs: {4}\n"
745 " - number of relations: {5}",
746 UseDex
? "Dex" : "MemIndex", SymbolFilename
,
747 Index
->estimateMemoryUsage(), NumSym
, NumRefs
, NumRelations
);
751 } // namespace clangd