[docs] Add LICENSE.txt to the root of the mono-repo
[llvm-project.git] / clang-tools-extra / clangd / unittests / SerializationTests.cpp
blobae1914f3033100f367d7c8c9e8031c4c9a9dfa69
1 //===-- SerializationTests.cpp - Binary and YAML serialization unit tests -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "Headers.h"
10 #include "RIFF.h"
11 #include "index/Serialization.h"
12 #include "support/Logger.h"
13 #include "clang/Tooling/CompilationDatabase.h"
14 #include "llvm/ADT/StringExtras.h"
15 #include "llvm/Support/Compression.h"
16 #include "llvm/Support/Error.h"
17 #include "llvm/Support/ScopedPrinter.h"
18 #include "gmock/gmock.h"
19 #include "gtest/gtest.h"
20 #ifdef LLVM_ON_UNIX
21 #include <sys/resource.h>
22 #endif
24 using ::testing::ElementsAre;
25 using ::testing::Pair;
26 using ::testing::UnorderedElementsAre;
27 using ::testing::UnorderedElementsAreArray;
29 namespace clang {
30 namespace clangd {
31 namespace {
33 const char *YAML = R"(
34 ---
35 !Symbol
36 ID: 057557CEBF6E6B2D
37 Name: 'Foo1'
38 Scope: 'clang::'
39 SymInfo:
40 Kind: Function
41 Lang: Cpp
42 CanonicalDeclaration:
43 FileURI: file:///path/foo.h
44 Start:
45 Line: 1
46 Column: 0
47 End:
48 Line: 1
49 Column: 1
50 Flags: 129
51 Documentation: 'Foo doc'
52 ReturnType: 'int'
53 IncludeHeaders:
54 - Header: 'include1'
55 References: 7
56 - Header: 'include2'
57 References: 3
58 ...
59 ---
60 !Symbol
61 ID: 057557CEBF6E6B2E
62 Name: 'Foo2'
63 Scope: 'clang::'
64 SymInfo:
65 Kind: Function
66 Lang: Cpp
67 CanonicalDeclaration:
68 FileURI: file:///path/bar.h
69 Start:
70 Line: 1
71 Column: 0
72 End:
73 Line: 1
74 Column: 1
75 Flags: 2
76 Signature: '-sig'
77 CompletionSnippetSuffix: '-snippet'
78 ...
79 !Refs
80 ID: 057557CEBF6E6B2D
81 References:
82 - Kind: 4
83 Location:
84 FileURI: file:///path/foo.cc
85 Start:
86 Line: 5
87 Column: 3
88 End:
89 Line: 5
90 Column: 8
91 ...
92 --- !Relations
93 Subject:
94 ID: 6481EE7AF2841756
95 Predicate: 0
96 Object:
97 ID: 6512AEC512EA3A2D
98 ...
99 --- !Cmd
100 Directory: 'testdir'
101 CommandLine:
102 - 'cmd1'
103 - 'cmd2'
105 --- !Source
106 URI: 'file:///path/source1.cpp'
107 Flags: 1
108 Digest: EED8F5EAF25C453C
109 DirectIncludes:
110 - 'file:///path/inc1.h'
111 - 'file:///path/inc2.h'
115 MATCHER_P(id, I, "") { return arg.ID == cantFail(SymbolID::fromStr(I)); }
116 MATCHER_P(qName, Name, "") { return (arg.Scope + arg.Name).str() == Name; }
117 MATCHER_P2(IncludeHeaderWithRef, IncludeHeader, References, "") {
118 return (arg.IncludeHeader == IncludeHeader) && (arg.References == References);
121 auto readIndexFile(llvm::StringRef Text) {
122 return readIndexFile(Text, SymbolOrigin::Static);
125 TEST(SerializationTest, NoCrashOnEmptyYAML) {
126 EXPECT_TRUE(bool(readIndexFile("")));
129 TEST(SerializationTest, YAMLConversions) {
130 auto ParsedYAML = readIndexFile(YAML);
131 ASSERT_TRUE(bool(ParsedYAML)) << ParsedYAML.takeError();
132 ASSERT_TRUE(bool(ParsedYAML->Symbols));
133 EXPECT_THAT(
134 *ParsedYAML->Symbols,
135 UnorderedElementsAre(id("057557CEBF6E6B2D"), id("057557CEBF6E6B2E")));
137 auto Sym1 = *ParsedYAML->Symbols->find(
138 cantFail(SymbolID::fromStr("057557CEBF6E6B2D")));
139 auto Sym2 = *ParsedYAML->Symbols->find(
140 cantFail(SymbolID::fromStr("057557CEBF6E6B2E")));
142 EXPECT_THAT(Sym1, qName("clang::Foo1"));
143 EXPECT_EQ(Sym1.Signature, "");
144 EXPECT_EQ(Sym1.Documentation, "Foo doc");
145 EXPECT_EQ(Sym1.ReturnType, "int");
146 EXPECT_EQ(StringRef(Sym1.CanonicalDeclaration.FileURI), "file:///path/foo.h");
147 EXPECT_EQ(Sym1.Origin, SymbolOrigin::Static);
148 EXPECT_EQ(static_cast<uint8_t>(Sym1.Flags), 129);
149 EXPECT_TRUE(Sym1.Flags & Symbol::IndexedForCodeCompletion);
150 EXPECT_FALSE(Sym1.Flags & Symbol::Deprecated);
151 EXPECT_THAT(Sym1.IncludeHeaders,
152 UnorderedElementsAre(IncludeHeaderWithRef("include1", 7u),
153 IncludeHeaderWithRef("include2", 3u)));
155 EXPECT_THAT(Sym2, qName("clang::Foo2"));
156 EXPECT_EQ(Sym2.Signature, "-sig");
157 EXPECT_EQ(Sym2.ReturnType, "");
158 EXPECT_EQ(llvm::StringRef(Sym2.CanonicalDeclaration.FileURI),
159 "file:///path/bar.h");
160 EXPECT_FALSE(Sym2.Flags & Symbol::IndexedForCodeCompletion);
161 EXPECT_TRUE(Sym2.Flags & Symbol::Deprecated);
163 ASSERT_TRUE(bool(ParsedYAML->Refs));
164 EXPECT_THAT(
165 *ParsedYAML->Refs,
166 UnorderedElementsAre(Pair(cantFail(SymbolID::fromStr("057557CEBF6E6B2D")),
167 ::testing::SizeIs(1))));
168 auto Ref1 = ParsedYAML->Refs->begin()->second.front();
169 EXPECT_EQ(Ref1.Kind, RefKind::Reference);
170 EXPECT_EQ(StringRef(Ref1.Location.FileURI), "file:///path/foo.cc");
172 SymbolID Base = cantFail(SymbolID::fromStr("6481EE7AF2841756"));
173 SymbolID Derived = cantFail(SymbolID::fromStr("6512AEC512EA3A2D"));
174 ASSERT_TRUE(bool(ParsedYAML->Relations));
175 EXPECT_THAT(
176 *ParsedYAML->Relations,
177 UnorderedElementsAre(Relation{Base, RelationKind::BaseOf, Derived}));
179 ASSERT_TRUE(bool(ParsedYAML->Cmd));
180 auto &Cmd = *ParsedYAML->Cmd;
181 ASSERT_EQ(Cmd.Directory, "testdir");
182 EXPECT_THAT(Cmd.CommandLine, ElementsAre("cmd1", "cmd2"));
184 ASSERT_TRUE(bool(ParsedYAML->Sources));
185 const auto *URI = "file:///path/source1.cpp";
186 ASSERT_TRUE(ParsedYAML->Sources->count(URI));
187 auto IGNDeserialized = ParsedYAML->Sources->lookup(URI);
188 EXPECT_EQ(llvm::toHex(IGNDeserialized.Digest), "EED8F5EAF25C453C");
189 EXPECT_THAT(IGNDeserialized.DirectIncludes,
190 ElementsAre("file:///path/inc1.h", "file:///path/inc2.h"));
191 EXPECT_EQ(IGNDeserialized.URI, URI);
192 EXPECT_EQ(IGNDeserialized.Flags, IncludeGraphNode::SourceFlag(1));
195 std::vector<std::string> yamlFromSymbols(const SymbolSlab &Slab) {
196 std::vector<std::string> Result;
197 for (const auto &Sym : Slab)
198 Result.push_back(toYAML(Sym));
199 return Result;
201 std::vector<std::string> yamlFromRefs(const RefSlab &Slab) {
202 std::vector<std::string> Result;
203 for (const auto &Refs : Slab)
204 Result.push_back(toYAML(Refs));
205 return Result;
208 std::vector<std::string> yamlFromRelations(const RelationSlab &Slab) {
209 std::vector<std::string> Result;
210 for (const auto &Rel : Slab)
211 Result.push_back(toYAML(Rel));
212 return Result;
215 TEST(SerializationTest, BinaryConversions) {
216 auto In = readIndexFile(YAML);
217 EXPECT_TRUE(bool(In)) << In.takeError();
219 // Write to binary format, and parse again.
220 IndexFileOut Out(*In);
221 Out.Format = IndexFileFormat::RIFF;
222 std::string Serialized = llvm::to_string(Out);
224 auto In2 = readIndexFile(Serialized);
225 ASSERT_TRUE(bool(In2)) << In2.takeError();
226 ASSERT_TRUE(In2->Symbols);
227 ASSERT_TRUE(In2->Refs);
228 ASSERT_TRUE(In2->Relations);
230 // Assert the YAML serializations match, for nice comparisons and diffs.
231 EXPECT_THAT(yamlFromSymbols(*In2->Symbols),
232 UnorderedElementsAreArray(yamlFromSymbols(*In->Symbols)));
233 EXPECT_THAT(yamlFromRefs(*In2->Refs),
234 UnorderedElementsAreArray(yamlFromRefs(*In->Refs)));
235 EXPECT_THAT(yamlFromRelations(*In2->Relations),
236 UnorderedElementsAreArray(yamlFromRelations(*In->Relations)));
239 TEST(SerializationTest, SrcsTest) {
240 auto In = readIndexFile(YAML);
241 EXPECT_TRUE(bool(In)) << In.takeError();
243 std::string TestContent("TestContent");
244 IncludeGraphNode IGN;
245 IGN.Digest = digest(TestContent);
246 IGN.DirectIncludes = {"inc1", "inc2"};
247 IGN.URI = "URI";
248 IGN.Flags |= IncludeGraphNode::SourceFlag::IsTU;
249 IGN.Flags |= IncludeGraphNode::SourceFlag::HadErrors;
250 IncludeGraph Sources;
251 Sources[IGN.URI] = IGN;
252 // Write to binary format, and parse again.
253 IndexFileOut Out(*In);
254 Out.Format = IndexFileFormat::RIFF;
255 Out.Sources = &Sources;
257 std::string Serialized = llvm::to_string(Out);
259 auto In = readIndexFile(Serialized);
260 ASSERT_TRUE(bool(In)) << In.takeError();
261 ASSERT_TRUE(In->Symbols);
262 ASSERT_TRUE(In->Refs);
263 ASSERT_TRUE(In->Sources);
264 ASSERT_TRUE(In->Sources->count(IGN.URI));
265 // Assert the YAML serializations match, for nice comparisons and diffs.
266 EXPECT_THAT(yamlFromSymbols(*In->Symbols),
267 UnorderedElementsAreArray(yamlFromSymbols(*In->Symbols)));
268 EXPECT_THAT(yamlFromRefs(*In->Refs),
269 UnorderedElementsAreArray(yamlFromRefs(*In->Refs)));
270 auto IGNDeserialized = In->Sources->lookup(IGN.URI);
271 EXPECT_EQ(IGNDeserialized.Digest, IGN.Digest);
272 EXPECT_EQ(IGNDeserialized.DirectIncludes, IGN.DirectIncludes);
273 EXPECT_EQ(IGNDeserialized.URI, IGN.URI);
274 EXPECT_EQ(IGNDeserialized.Flags, IGN.Flags);
278 TEST(SerializationTest, CmdlTest) {
279 auto In = readIndexFile(YAML);
280 EXPECT_TRUE(bool(In)) << In.takeError();
282 tooling::CompileCommand Cmd;
283 Cmd.Directory = "testdir";
284 Cmd.CommandLine.push_back("cmd1");
285 Cmd.CommandLine.push_back("cmd2");
286 Cmd.Filename = "ignored";
287 Cmd.Heuristic = "ignored";
288 Cmd.Output = "ignored";
290 IndexFileOut Out(*In);
291 Out.Format = IndexFileFormat::RIFF;
292 Out.Cmd = &Cmd;
294 std::string Serialized = llvm::to_string(Out);
296 auto In = readIndexFile(Serialized);
297 ASSERT_TRUE(bool(In)) << In.takeError();
298 ASSERT_TRUE(In->Cmd);
300 const tooling::CompileCommand &SerializedCmd = *In->Cmd;
301 EXPECT_EQ(SerializedCmd.CommandLine, Cmd.CommandLine);
302 EXPECT_EQ(SerializedCmd.Directory, Cmd.Directory);
303 EXPECT_NE(SerializedCmd.Filename, Cmd.Filename);
304 EXPECT_NE(SerializedCmd.Heuristic, Cmd.Heuristic);
305 EXPECT_NE(SerializedCmd.Output, Cmd.Output);
309 // rlimit is part of POSIX. RLIMIT_AS does not exist in OpenBSD.
310 // Sanitizers use a lot of address space, so we can't apply strict limits.
311 #if LLVM_ON_UNIX && defined(RLIMIT_AS) && !LLVM_ADDRESS_SANITIZER_BUILD && \
312 !LLVM_MEMORY_SANITIZER_BUILD
313 class ScopedMemoryLimit {
314 struct rlimit OriginalLimit;
315 bool Succeeded = false;
317 public:
318 ScopedMemoryLimit(rlim_t Bytes) {
319 if (!getrlimit(RLIMIT_AS, &OriginalLimit)) {
320 struct rlimit NewLimit = OriginalLimit;
321 NewLimit.rlim_cur = Bytes;
322 Succeeded = !setrlimit(RLIMIT_AS, &NewLimit);
324 if (!Succeeded)
325 log("Failed to set rlimit");
328 ~ScopedMemoryLimit() {
329 if (Succeeded)
330 setrlimit(RLIMIT_AS, &OriginalLimit);
333 #else
334 class ScopedMemoryLimit {
335 public:
336 ScopedMemoryLimit(unsigned Bytes) { log("rlimit unsupported"); }
338 #endif
340 // Test that our deserialization detects invalid array sizes without allocating.
341 // If this detection fails, the test should allocate a huge array and crash.
342 TEST(SerializationTest, NoCrashOnBadArraySize) {
343 // This test is tricky because we need to construct a subtly invalid file.
344 // First, create a valid serialized file.
345 auto In = readIndexFile(YAML);
346 ASSERT_FALSE(!In) << In.takeError();
347 IndexFileOut Out(*In);
348 Out.Format = IndexFileFormat::RIFF;
349 std::string Serialized = llvm::to_string(Out);
351 // Low-level parse it again and find the `srcs` chunk we're going to corrupt.
352 auto Parsed = riff::readFile(Serialized);
353 ASSERT_FALSE(!Parsed) << Parsed.takeError();
354 auto Srcs = llvm::find_if(Parsed->Chunks, [](riff::Chunk C) {
355 return C.ID == riff::fourCC("srcs");
357 ASSERT_NE(Srcs, Parsed->Chunks.end());
359 // Srcs consists of a sequence of IncludeGraphNodes. In our case, just one.
360 // The node has:
361 // - 1 byte: flags (1)
362 // - varint(stringID): URI
363 // - 8 byte: file digest
364 // - varint: DirectIncludes.length
365 // - repeated varint(stringID): DirectIncludes
366 // We want to set DirectIncludes.length to a huge number.
367 // The offset isn't trivial to find, so we use the file digest.
368 std::string FileDigest = llvm::fromHex("EED8F5EAF25C453C");
369 unsigned Pos = Srcs->Data.find_first_of(FileDigest);
370 ASSERT_NE(Pos, StringRef::npos) << "Couldn't locate file digest";
371 Pos += FileDigest.size();
373 // Varints are little-endian base-128 numbers, where the top-bit of each byte
374 // indicates whether there are more. ffffffff0f -> 0xffffffff.
375 std::string CorruptSrcs =
376 (Srcs->Data.take_front(Pos) + llvm::fromHex("ffffffff0f") +
377 "some_random_garbage")
378 .str();
379 Srcs->Data = CorruptSrcs;
381 // Try to crash rather than hang on large allocation.
382 ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB
384 std::string CorruptFile = llvm::to_string(*Parsed);
385 auto CorruptParsed = readIndexFile(CorruptFile);
386 ASSERT_TRUE(!CorruptParsed);
387 EXPECT_EQ(llvm::toString(CorruptParsed.takeError()),
388 "malformed or truncated include uri");
391 // Check we detect invalid string table size size without allocating it first.
392 // If this detection fails, the test should allocate a huge array and crash.
393 TEST(SerializationTest, NoCrashOnBadStringTableSize) {
394 if (!llvm::compression::zlib::isAvailable()) {
395 log("skipping test, no zlib");
396 return;
399 // First, create a valid serialized file.
400 auto In = readIndexFile(YAML);
401 ASSERT_FALSE(!In) << In.takeError();
402 IndexFileOut Out(*In);
403 Out.Format = IndexFileFormat::RIFF;
404 std::string Serialized = llvm::to_string(Out);
406 // Low-level parse it again, we're going to replace the `stri` chunk.
407 auto Parsed = riff::readFile(Serialized);
408 ASSERT_FALSE(!Parsed) << Parsed.takeError();
409 auto Stri = llvm::find_if(Parsed->Chunks, [](riff::Chunk C) {
410 return C.ID == riff::fourCC("stri");
412 ASSERT_NE(Stri, Parsed->Chunks.end());
414 // stri consists of an 8 byte uncompressed-size, and then compressed data.
415 // We'll claim our small amount of data expands to 4GB
416 std::string CorruptStri =
417 (llvm::fromHex("ffffffff") + Stri->Data.drop_front(4)).str();
418 Stri->Data = CorruptStri;
419 std::string FileDigest = llvm::fromHex("EED8F5EAF25C453C");
421 // Try to crash rather than hang on large allocation.
422 ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB
424 std::string CorruptFile = llvm::to_string(*Parsed);
425 auto CorruptParsed = readIndexFile(CorruptFile);
426 ASSERT_TRUE(!CorruptParsed);
427 EXPECT_THAT(llvm::toString(CorruptParsed.takeError()),
428 testing::HasSubstr("bytes is implausible"));
431 } // namespace
432 } // namespace clangd
433 } // namespace clang