1 //===-- Serialization.cpp - Binary serialization of index data ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "Serialization.h"
12 #include "index/MemIndex.h"
13 #include "index/SymbolLocation.h"
14 #include "index/SymbolOrigin.h"
15 #include "index/dex/Dex.h"
16 #include "support/Logger.h"
17 #include "support/Trace.h"
18 #include "clang/Tooling/CompilationDatabase.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/Compiler.h"
21 #include "llvm/Support/Compression.h"
22 #include "llvm/Support/Endian.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/raw_ostream.h"
33 // We use little-endian 32 bit ints, sometimes with variable-length encoding.
35 // Variable-length int encoding (varint) uses the bottom 7 bits of each byte
36 // to encode the number, and the top bit to indicate whether more bytes follow.
37 // e.g. 9a 2f means [0x1a and keep reading, 0x2f and stop].
38 // This represents 0x1a | 0x2f<<7 = 6042.
39 // A 32-bit integer takes 1-5 bytes to encode; small numbers are more compact.
41 // Reads binary data from a StringRef, and keeps track of position.
43 const char *Begin
, *End
;
47 Reader(llvm::StringRef Data
) : Begin(Data
.begin()), End(Data
.end()) {}
48 // The "error" bit is set by reading past EOF or reading invalid data.
49 // When in an error state, reads may return zero values: callers should check.
50 bool err() const { return Err
; }
51 // Did we read all the data, or encounter an error?
52 bool eof() const { return Begin
== End
|| Err
; }
53 // All the data we didn't read yet.
54 llvm::StringRef
rest() const { return llvm::StringRef(Begin
, End
- Begin
); }
57 if (LLVM_UNLIKELY(Begin
== End
)) {
64 uint32_t consume32() {
65 if (LLVM_UNLIKELY(Begin
+ 4 > End
)) {
69 auto Ret
= llvm::support::endian::read32le(Begin
);
74 llvm::StringRef
consume(int N
) {
75 if (LLVM_UNLIKELY(Begin
+ N
> End
)) {
77 return llvm::StringRef();
79 llvm::StringRef
Ret(Begin
, N
);
84 uint32_t consumeVar() {
85 constexpr static uint8_t More
= 1 << 7;
87 // Use a 32 bit unsigned here to prevent promotion to signed int (unless int
88 // is wider than 32 bits).
89 uint32_t B
= consume8();
90 if (LLVM_LIKELY(!(B
& More
)))
92 uint32_t Val
= B
& ~More
;
93 for (int Shift
= 7; B
& More
&& Shift
< 32; Shift
+= 7) {
95 // 5th byte of a varint can only have lowest 4 bits set.
96 assert((Shift
!= 28 || B
== (B
& 0x0f)) && "Invalid varint encoding");
97 Val
|= (B
& ~More
) << Shift
;
102 llvm::StringRef
consumeString(llvm::ArrayRef
<llvm::StringRef
> Strings
) {
103 auto StringIndex
= consumeVar();
104 if (LLVM_UNLIKELY(StringIndex
>= Strings
.size())) {
106 return llvm::StringRef();
108 return Strings
[StringIndex
];
111 SymbolID
consumeID() {
112 llvm::StringRef Raw
= consume(SymbolID::RawSize
); // short if truncated.
113 return LLVM_UNLIKELY(err()) ? SymbolID() : SymbolID::fromRaw(Raw
);
116 // Read a varint (as consumeVar) and resize the container accordingly.
117 // If the size is invalid, return false and mark an error.
118 // (The caller should abort in this case).
119 template <typename T
> [[nodiscard
]] bool consumeSize(T
&Container
) {
120 auto Size
= consumeVar();
121 // Conservatively assume each element is at least one byte.
122 if (Size
> (size_t)(End
- Begin
)) {
126 Container
.resize(Size
);
131 void write32(uint32_t I
, llvm::raw_ostream
&OS
) {
133 llvm::support::endian::write32le(Buf
, I
);
134 OS
.write(Buf
, sizeof(Buf
));
137 void writeVar(uint32_t I
, llvm::raw_ostream
&OS
) {
138 constexpr static uint8_t More
= 1 << 7;
139 if (LLVM_LIKELY(I
< 1 << 7)) {
153 // STRING TABLE ENCODING
154 // Index data has many string fields, and many strings are identical.
155 // We store each string once, and refer to them by index.
157 // The string table's format is:
158 // - UncompressedSize : uint32 (or 0 for no compression)
159 // - CompressedData : byte[CompressedSize]
161 // CompressedData is a zlib-compressed byte[UncompressedSize].
162 // It contains a sequence of null-terminated strings, e.g. "foo\0bar\0".
163 // These are sorted to improve compression.
165 // Maps each string to a canonical representation.
166 // Strings remain owned externally (e.g. by SymbolSlab).
167 class StringTableOut
{
168 llvm::DenseSet
<llvm::StringRef
> Unique
;
169 std::vector
<llvm::StringRef
> Sorted
;
170 // Since strings are interned, look up can be by pointer.
171 llvm::DenseMap
<std::pair
<const char *, size_t>, unsigned> Index
;
175 // Ensure there's at least one string in the table.
176 // Table size zero is reserved to indicate no compression.
179 // Add a string to the table. Overwrites S if an identical string exists.
180 void intern(llvm::StringRef
&S
) { S
= *Unique
.insert(S
).first
; };
181 // Finalize the table and write it to OS. No more strings may be added.
182 void finalize(llvm::raw_ostream
&OS
) {
183 Sorted
= {Unique
.begin(), Unique
.end()};
185 for (unsigned I
= 0; I
< Sorted
.size(); ++I
)
186 Index
.try_emplace({Sorted
[I
].data(), Sorted
[I
].size()}, I
);
188 std::string RawTable
;
189 for (llvm::StringRef S
: Sorted
) {
190 RawTable
.append(std::string(S
));
191 RawTable
.push_back(0);
193 if (llvm::compression::zlib::isAvailable()) {
194 llvm::SmallVector
<uint8_t, 0> Compressed
;
195 llvm::compression::zlib::compress(llvm::arrayRefFromStringRef(RawTable
),
197 write32(RawTable
.size(), OS
);
198 OS
<< llvm::toStringRef(Compressed
);
200 write32(0, OS
); // No compression.
204 // Get the ID of an string, which must be interned. Table must be finalized.
205 unsigned index(llvm::StringRef S
) const {
206 assert(!Sorted
.empty() && "table not finalized");
207 assert(Index
.count({S
.data(), S
.size()}) && "string not interned");
208 return Index
.find({S
.data(), S
.size()})->second
;
212 struct StringTableIn
{
213 llvm::BumpPtrAllocator Arena
;
214 std::vector
<llvm::StringRef
> Strings
;
217 llvm::Expected
<StringTableIn
> readStringTable(llvm::StringRef Data
) {
219 size_t UncompressedSize
= R
.consume32();
221 return error("Truncated string table");
223 llvm::StringRef Uncompressed
;
224 llvm::SmallVector
<uint8_t, 0> UncompressedStorage
;
225 if (UncompressedSize
== 0) // No compression
226 Uncompressed
= R
.rest();
227 else if (llvm::compression::zlib::isAvailable()) {
228 // Don't allocate a massive buffer if UncompressedSize was corrupted
229 // This is effective for sharded index, but not big monolithic ones, as
230 // once compressed size reaches 4MB nothing can be ruled out.
231 // Theoretical max ratio from https://zlib.net/zlib_tech.html
232 constexpr int MaxCompressionRatio
= 1032;
233 if (UncompressedSize
/ MaxCompressionRatio
> R
.rest().size())
234 return error("Bad stri table: uncompress {0} -> {1} bytes is implausible",
235 R
.rest().size(), UncompressedSize
);
237 if (llvm::Error E
= llvm::compression::zlib::uncompress(
238 llvm::arrayRefFromStringRef(R
.rest()), UncompressedStorage
,
241 Uncompressed
= toStringRef(UncompressedStorage
);
243 return error("Compressed string table, but zlib is unavailable");
246 llvm::StringSaver
Saver(Table
.Arena
);
247 R
= Reader(Uncompressed
);
248 for (Reader
R(Uncompressed
); !R
.eof();) {
249 auto Len
= R
.rest().find(0);
250 if (Len
== llvm::StringRef::npos
)
251 return error("Bad string table: not null terminated");
252 Table
.Strings
.push_back(Saver
.save(R
.consume(Len
)));
256 return error("Truncated string table");
257 return std::move(Table
);
261 // Each field of clangd::Symbol is encoded in turn (see implementation).
262 // - StringRef fields encode as varint (index into the string table)
263 // - enums encode as the underlying type
264 // - most numbers encode as varint
266 void writeLocation(const SymbolLocation
&Loc
, const StringTableOut
&Strings
,
267 llvm::raw_ostream
&OS
) {
268 writeVar(Strings
.index(Loc
.FileURI
), OS
);
269 for (const auto &Endpoint
: {Loc
.Start
, Loc
.End
}) {
270 writeVar(Endpoint
.line(), OS
);
271 writeVar(Endpoint
.column(), OS
);
275 SymbolLocation
readLocation(Reader
&Data
,
276 llvm::ArrayRef
<llvm::StringRef
> Strings
) {
278 Loc
.FileURI
= Data
.consumeString(Strings
).data();
279 for (auto *Endpoint
: {&Loc
.Start
, &Loc
.End
}) {
280 Endpoint
->setLine(Data
.consumeVar());
281 Endpoint
->setColumn(Data
.consumeVar());
286 IncludeGraphNode
readIncludeGraphNode(Reader
&Data
,
287 llvm::ArrayRef
<llvm::StringRef
> Strings
) {
288 IncludeGraphNode IGN
;
289 IGN
.Flags
= static_cast<IncludeGraphNode::SourceFlag
>(Data
.consume8());
290 IGN
.URI
= Data
.consumeString(Strings
);
291 llvm::StringRef Digest
= Data
.consume(IGN
.Digest
.size());
292 std::copy(Digest
.bytes_begin(), Digest
.bytes_end(), IGN
.Digest
.begin());
293 if (!Data
.consumeSize(IGN
.DirectIncludes
))
295 for (llvm::StringRef
&Include
: IGN
.DirectIncludes
)
296 Include
= Data
.consumeString(Strings
);
300 void writeIncludeGraphNode(const IncludeGraphNode
&IGN
,
301 const StringTableOut
&Strings
,
302 llvm::raw_ostream
&OS
) {
303 OS
.write(static_cast<uint8_t>(IGN
.Flags
));
304 writeVar(Strings
.index(IGN
.URI
), OS
);
305 llvm::StringRef
Hash(reinterpret_cast<const char *>(IGN
.Digest
.data()),
308 writeVar(IGN
.DirectIncludes
.size(), OS
);
309 for (llvm::StringRef Include
: IGN
.DirectIncludes
)
310 writeVar(Strings
.index(Include
), OS
);
313 void writeSymbol(const Symbol
&Sym
, const StringTableOut
&Strings
,
314 llvm::raw_ostream
&OS
) {
315 OS
<< Sym
.ID
.raw(); // TODO: once we start writing xrefs and posting lists,
316 // symbol IDs should probably be in a string table.
317 OS
.write(static_cast<uint8_t>(Sym
.SymInfo
.Kind
));
318 OS
.write(static_cast<uint8_t>(Sym
.SymInfo
.Lang
));
319 writeVar(Strings
.index(Sym
.Name
), OS
);
320 writeVar(Strings
.index(Sym
.Scope
), OS
);
321 writeVar(Strings
.index(Sym
.TemplateSpecializationArgs
), OS
);
322 writeLocation(Sym
.Definition
, Strings
, OS
);
323 writeLocation(Sym
.CanonicalDeclaration
, Strings
, OS
);
324 writeVar(Sym
.References
, OS
);
325 OS
.write(static_cast<uint8_t>(Sym
.Flags
));
326 writeVar(Strings
.index(Sym
.Signature
), OS
);
327 writeVar(Strings
.index(Sym
.CompletionSnippetSuffix
), OS
);
328 writeVar(Strings
.index(Sym
.Documentation
), OS
);
329 writeVar(Strings
.index(Sym
.ReturnType
), OS
);
330 writeVar(Strings
.index(Sym
.Type
), OS
);
332 auto WriteInclude
= [&](const Symbol::IncludeHeaderWithReferences
&Include
) {
333 writeVar(Strings
.index(Include
.IncludeHeader
), OS
);
334 writeVar(Include
.References
, OS
);
336 writeVar(Sym
.IncludeHeaders
.size(), OS
);
337 for (const auto &Include
: Sym
.IncludeHeaders
)
338 WriteInclude(Include
);
341 Symbol
readSymbol(Reader
&Data
, llvm::ArrayRef
<llvm::StringRef
> Strings
,
342 SymbolOrigin Origin
) {
344 Sym
.ID
= Data
.consumeID();
345 Sym
.SymInfo
.Kind
= static_cast<index::SymbolKind
>(Data
.consume8());
346 Sym
.SymInfo
.Lang
= static_cast<index::SymbolLanguage
>(Data
.consume8());
347 Sym
.Name
= Data
.consumeString(Strings
);
348 Sym
.Scope
= Data
.consumeString(Strings
);
349 Sym
.TemplateSpecializationArgs
= Data
.consumeString(Strings
);
350 Sym
.Definition
= readLocation(Data
, Strings
);
351 Sym
.CanonicalDeclaration
= readLocation(Data
, Strings
);
352 Sym
.References
= Data
.consumeVar();
353 Sym
.Flags
= static_cast<Symbol::SymbolFlag
>(Data
.consume8());
355 Sym
.Signature
= Data
.consumeString(Strings
);
356 Sym
.CompletionSnippetSuffix
= Data
.consumeString(Strings
);
357 Sym
.Documentation
= Data
.consumeString(Strings
);
358 Sym
.ReturnType
= Data
.consumeString(Strings
);
359 Sym
.Type
= Data
.consumeString(Strings
);
360 if (!Data
.consumeSize(Sym
.IncludeHeaders
))
362 for (auto &I
: Sym
.IncludeHeaders
) {
363 I
.IncludeHeader
= Data
.consumeString(Strings
);
364 I
.References
= Data
.consumeVar();
370 // A refs section has data grouped by Symbol. Each symbol has:
371 // - SymbolID: 8 bytes
374 // Fields of Ref are encoded in turn, see implementation.
376 void writeRefs(const SymbolID
&ID
, llvm::ArrayRef
<Ref
> Refs
,
377 const StringTableOut
&Strings
, llvm::raw_ostream
&OS
) {
379 writeVar(Refs
.size(), OS
);
380 for (const auto &Ref
: Refs
) {
381 OS
.write(static_cast<unsigned char>(Ref
.Kind
));
382 writeLocation(Ref
.Location
, Strings
, OS
);
383 OS
<< Ref
.Container
.raw();
387 std::pair
<SymbolID
, std::vector
<Ref
>>
388 readRefs(Reader
&Data
, llvm::ArrayRef
<llvm::StringRef
> Strings
) {
389 std::pair
<SymbolID
, std::vector
<Ref
>> Result
;
390 Result
.first
= Data
.consumeID();
391 if (!Data
.consumeSize(Result
.second
))
393 for (auto &Ref
: Result
.second
) {
394 Ref
.Kind
= static_cast<RefKind
>(Data
.consume8());
395 Ref
.Location
= readLocation(Data
, Strings
);
396 Ref
.Container
= Data
.consumeID();
401 // RELATIONS ENCODING
402 // A relations section is a flat list of relations. Each relation has:
403 // - SymbolID (subject): 8 bytes
404 // - relation kind (predicate): 1 byte
405 // - SymbolID (object): 8 bytes
406 // In the future, we might prefer a packed representation if the need arises.
408 void writeRelation(const Relation
&R
, llvm::raw_ostream
&OS
) {
409 OS
<< R
.Subject
.raw();
410 OS
.write(static_cast<uint8_t>(R
.Predicate
));
411 OS
<< R
.Object
.raw();
414 Relation
readRelation(Reader
&Data
) {
415 SymbolID Subject
= Data
.consumeID();
416 RelationKind Predicate
= static_cast<RelationKind
>(Data
.consume8());
417 SymbolID Object
= Data
.consumeID();
418 return {Subject
, Predicate
, Object
};
421 struct InternedCompileCommand
{
422 llvm::StringRef Directory
;
423 std::vector
<llvm::StringRef
> CommandLine
;
426 void writeCompileCommand(const InternedCompileCommand
&Cmd
,
427 const StringTableOut
&Strings
,
428 llvm::raw_ostream
&CmdOS
) {
429 writeVar(Strings
.index(Cmd
.Directory
), CmdOS
);
430 writeVar(Cmd
.CommandLine
.size(), CmdOS
);
431 for (llvm::StringRef C
: Cmd
.CommandLine
)
432 writeVar(Strings
.index(C
), CmdOS
);
435 InternedCompileCommand
436 readCompileCommand(Reader CmdReader
, llvm::ArrayRef
<llvm::StringRef
> Strings
) {
437 InternedCompileCommand Cmd
;
438 Cmd
.Directory
= CmdReader
.consumeString(Strings
);
439 if (!CmdReader
.consumeSize(Cmd
.CommandLine
))
441 for (llvm::StringRef
&C
: Cmd
.CommandLine
)
442 C
= CmdReader
.consumeString(Strings
);
447 // A file is a RIFF chunk with type 'CdIx'.
448 // It contains the sections:
449 // - meta: version number
450 // - srcs: information related to include graph
451 // - stri: string table
453 // - refs: references to symbols
455 // The current versioning scheme is simple - non-current versions are rejected.
456 // If you make a breaking change, bump this version number to invalidate stored
457 // data. Later we may want to support some backward compatibility.
458 constexpr static uint32_t Version
= 17;
460 llvm::Expected
<IndexFileIn
> readRIFF(llvm::StringRef Data
,
461 SymbolOrigin Origin
) {
462 auto RIFF
= riff::readFile(Data
);
464 return RIFF
.takeError();
465 if (RIFF
->Type
!= riff::fourCC("CdIx"))
466 return error("wrong RIFF filetype: {0}", riff::fourCCStr(RIFF
->Type
));
467 llvm::StringMap
<llvm::StringRef
> Chunks
;
468 for (const auto &Chunk
: RIFF
->Chunks
)
469 Chunks
.try_emplace(llvm::StringRef(Chunk
.ID
.data(), Chunk
.ID
.size()),
472 if (!Chunks
.count("meta"))
473 return error("missing meta chunk");
474 Reader
Meta(Chunks
.lookup("meta"));
475 auto SeenVersion
= Meta
.consume32();
476 if (SeenVersion
!= Version
)
477 return error("wrong version: want {0}, got {1}", Version
, SeenVersion
);
479 // meta chunk is checked above, as we prefer the "version mismatch" error.
480 for (llvm::StringRef RequiredChunk
: {"stri"})
481 if (!Chunks
.count(RequiredChunk
))
482 return error("missing required chunk {0}", RequiredChunk
);
484 auto Strings
= readStringTable(Chunks
.lookup("stri"));
486 return Strings
.takeError();
489 if (Chunks
.count("srcs")) {
490 Reader
SrcsReader(Chunks
.lookup("srcs"));
491 Result
.Sources
.emplace();
492 while (!SrcsReader
.eof()) {
493 auto IGN
= readIncludeGraphNode(SrcsReader
, Strings
->Strings
);
494 auto Entry
= Result
.Sources
->try_emplace(IGN
.URI
).first
;
495 Entry
->getValue() = std::move(IGN
);
496 // We change all the strings inside the structure to point at the keys in
497 // the map, since it is the only copy of the string that's going to live.
498 Entry
->getValue().URI
= Entry
->getKey();
499 for (auto &Include
: Entry
->getValue().DirectIncludes
)
500 Include
= Result
.Sources
->try_emplace(Include
).first
->getKey();
502 if (SrcsReader
.err())
503 return error("malformed or truncated include uri");
506 if (Chunks
.count("symb")) {
507 Reader
SymbolReader(Chunks
.lookup("symb"));
508 SymbolSlab::Builder Symbols
;
509 while (!SymbolReader
.eof())
510 Symbols
.insert(readSymbol(SymbolReader
, Strings
->Strings
, Origin
));
511 if (SymbolReader
.err())
512 return error("malformed or truncated symbol");
513 Result
.Symbols
= std::move(Symbols
).build();
515 if (Chunks
.count("refs")) {
516 Reader
RefsReader(Chunks
.lookup("refs"));
517 RefSlab::Builder Refs
;
518 while (!RefsReader
.eof()) {
519 auto RefsBundle
= readRefs(RefsReader
, Strings
->Strings
);
520 for (const auto &Ref
: RefsBundle
.second
) // FIXME: bulk insert?
521 Refs
.insert(RefsBundle
.first
, Ref
);
523 if (RefsReader
.err())
524 return error("malformed or truncated refs");
525 Result
.Refs
= std::move(Refs
).build();
527 if (Chunks
.count("rela")) {
528 Reader
RelationsReader(Chunks
.lookup("rela"));
529 RelationSlab::Builder Relations
;
530 while (!RelationsReader
.eof())
531 Relations
.insert(readRelation(RelationsReader
));
532 if (RelationsReader
.err())
533 return error("malformed or truncated relations");
534 Result
.Relations
= std::move(Relations
).build();
536 if (Chunks
.count("cmdl")) {
537 Reader
CmdReader(Chunks
.lookup("cmdl"));
538 InternedCompileCommand Cmd
=
539 readCompileCommand(CmdReader
, Strings
->Strings
);
541 return error("malformed or truncated commandline section");
542 Result
.Cmd
.emplace();
543 Result
.Cmd
->Directory
= std::string(Cmd
.Directory
);
544 Result
.Cmd
->CommandLine
.reserve(Cmd
.CommandLine
.size());
545 for (llvm::StringRef C
: Cmd
.CommandLine
)
546 Result
.Cmd
->CommandLine
.emplace_back(C
);
548 return std::move(Result
);
551 template <class Callback
>
552 void visitStrings(IncludeGraphNode
&IGN
, const Callback
&CB
) {
554 for (llvm::StringRef
&Include
: IGN
.DirectIncludes
)
558 void writeRIFF(const IndexFileOut
&Data
, llvm::raw_ostream
&OS
) {
559 assert(Data
.Symbols
&& "An index file without symbols makes no sense!");
561 RIFF
.Type
= riff::fourCC("CdIx");
563 llvm::SmallString
<4> Meta
;
565 llvm::raw_svector_ostream
MetaOS(Meta
);
566 write32(Version
, MetaOS
);
568 RIFF
.Chunks
.push_back({riff::fourCC("meta"), Meta
});
570 StringTableOut Strings
;
571 std::vector
<Symbol
> Symbols
;
572 for (const auto &Sym
: *Data
.Symbols
) {
573 Symbols
.emplace_back(Sym
);
574 visitStrings(Symbols
.back(),
575 [&](llvm::StringRef
&S
) { Strings
.intern(S
); });
577 std::vector
<IncludeGraphNode
> Sources
;
579 for (const auto &Source
: *Data
.Sources
) {
580 Sources
.push_back(Source
.getValue());
581 visitStrings(Sources
.back(),
582 [&](llvm::StringRef
&S
) { Strings
.intern(S
); });
585 std::vector
<std::pair
<SymbolID
, std::vector
<Ref
>>> Refs
;
587 for (const auto &Sym
: *Data
.Refs
) {
588 Refs
.emplace_back(Sym
);
589 for (auto &Ref
: Refs
.back().second
) {
590 llvm::StringRef File
= Ref
.Location
.FileURI
;
591 Strings
.intern(File
);
592 Ref
.Location
.FileURI
= File
.data();
597 std::vector
<Relation
> Relations
;
598 if (Data
.Relations
) {
599 for (const auto &Relation
: *Data
.Relations
) {
600 Relations
.emplace_back(Relation
);
601 // No strings to be interned in relations.
605 InternedCompileCommand InternedCmd
;
607 InternedCmd
.CommandLine
.reserve(Data
.Cmd
->CommandLine
.size());
608 InternedCmd
.Directory
= Data
.Cmd
->Directory
;
609 Strings
.intern(InternedCmd
.Directory
);
610 for (llvm::StringRef C
: Data
.Cmd
->CommandLine
) {
611 InternedCmd
.CommandLine
.emplace_back(C
);
612 Strings
.intern(InternedCmd
.CommandLine
.back());
616 std::string StringSection
;
618 llvm::raw_string_ostream
StringOS(StringSection
);
619 Strings
.finalize(StringOS
);
621 RIFF
.Chunks
.push_back({riff::fourCC("stri"), StringSection
});
623 std::string SymbolSection
;
625 llvm::raw_string_ostream
SymbolOS(SymbolSection
);
626 for (const auto &Sym
: Symbols
)
627 writeSymbol(Sym
, Strings
, SymbolOS
);
629 RIFF
.Chunks
.push_back({riff::fourCC("symb"), SymbolSection
});
631 std::string RefsSection
;
634 llvm::raw_string_ostream
RefsOS(RefsSection
);
635 for (const auto &Sym
: Refs
)
636 writeRefs(Sym
.first
, Sym
.second
, Strings
, RefsOS
);
638 RIFF
.Chunks
.push_back({riff::fourCC("refs"), RefsSection
});
641 std::string RelationSection
;
642 if (Data
.Relations
) {
644 llvm::raw_string_ostream RelationOS
{RelationSection
};
645 for (const auto &Relation
: Relations
)
646 writeRelation(Relation
, RelationOS
);
648 RIFF
.Chunks
.push_back({riff::fourCC("rela"), RelationSection
});
651 std::string SrcsSection
;
654 llvm::raw_string_ostream
SrcsOS(SrcsSection
);
655 for (const auto &SF
: Sources
)
656 writeIncludeGraphNode(SF
, Strings
, SrcsOS
);
658 RIFF
.Chunks
.push_back({riff::fourCC("srcs"), SrcsSection
});
661 std::string CmdlSection
;
664 llvm::raw_string_ostream
CmdOS(CmdlSection
);
665 writeCompileCommand(InternedCmd
, Strings
, CmdOS
);
667 RIFF
.Chunks
.push_back({riff::fourCC("cmdl"), CmdlSection
});
675 // Defined in YAMLSerialization.cpp.
676 void writeYAML(const IndexFileOut
&, llvm::raw_ostream
&);
677 llvm::Expected
<IndexFileIn
> readYAML(llvm::StringRef
, SymbolOrigin Origin
);
679 llvm::raw_ostream
&operator<<(llvm::raw_ostream
&OS
, const IndexFileOut
&O
) {
681 case IndexFileFormat::RIFF
:
684 case IndexFileFormat::YAML
:
691 llvm::Expected
<IndexFileIn
> readIndexFile(llvm::StringRef Data
,
692 SymbolOrigin Origin
) {
693 if (Data
.startswith("RIFF")) {
694 return readRIFF(Data
, Origin
);
696 if (auto YAMLContents
= readYAML(Data
, Origin
)) {
697 return std::move(*YAMLContents
);
699 return error("Not a RIFF file and failed to parse as YAML: {0}",
700 YAMLContents
.takeError());
704 std::unique_ptr
<SymbolIndex
> loadIndex(llvm::StringRef SymbolFilename
,
705 SymbolOrigin Origin
, bool UseDex
) {
706 trace::Span
OverallTracer("LoadIndex");
707 auto Buffer
= llvm::MemoryBuffer::getFile(SymbolFilename
);
709 elog("Can't open {0}: {1}", SymbolFilename
, Buffer
.getError().message());
715 RelationSlab Relations
;
717 trace::Span
Tracer("ParseIndex");
718 if (auto I
= readIndexFile(Buffer
->get()->getBuffer(), Origin
)) {
720 Symbols
= std::move(*I
->Symbols
);
722 Refs
= std::move(*I
->Refs
);
724 Relations
= std::move(*I
->Relations
);
726 elog("Bad index file: {0}", I
.takeError());
731 size_t NumSym
= Symbols
.size();
732 size_t NumRefs
= Refs
.numRefs();
733 size_t NumRelations
= Relations
.size();
735 trace::Span
Tracer("BuildIndex");
736 auto Index
= UseDex
? dex::Dex::build(std::move(Symbols
), std::move(Refs
),
737 std::move(Relations
))
738 : MemIndex::build(std::move(Symbols
), std::move(Refs
),
739 std::move(Relations
));
740 vlog("Loaded {0} from {1} with estimated memory usage {2} bytes\n"
741 " - number of symbols: {3}\n"
742 " - number of refs: {4}\n"
743 " - number of relations: {5}",
744 UseDex
? "Dex" : "MemIndex", SymbolFilename
,
745 Index
->estimateMemoryUsage(), NumSym
, NumRefs
, NumRelations
);
749 } // namespace clangd