1 //===-- SerializationTests.cpp - Binary and YAML serialization unit tests -===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 #include "index/Serialization.h"
12 #include "support/Logger.h"
13 #include "clang/Tooling/CompilationDatabase.h"
14 #include "llvm/ADT/StringExtras.h"
15 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
16 #include "llvm/Support/Compression.h"
17 #include "llvm/Support/Error.h"
18 #include "llvm/Support/ScopedPrinter.h"
19 #include "gmock/gmock.h"
20 #include "gtest/gtest.h"
22 #include <sys/resource.h>
25 using ::testing::ElementsAre
;
26 using ::testing::Pair
;
27 using ::testing::UnorderedElementsAre
;
28 using ::testing::UnorderedElementsAreArray
;
34 const char *YAML
= R
"(
44 FileURI: file:///path/foo.h
52 Documentation: 'Foo doc'
57 Directives: [ Include ]
60 Directives: [ Import ]
63 Directives: [ Include, Import ]
77 FileURI: file:///path/bar.h
86 CompletionSnippetSuffix: '-snippet'
93 FileURI: file:///path/foo.cc
115 URI: 'file:///path/source1.cpp'
117 Digest: EED8F5EAF25C453C
119 - 'file:///path/inc1.h'
120 - 'file:///path/inc2.h'
124 MATCHER_P(id
, I
, "") { return arg
.ID
== cantFail(SymbolID::fromStr(I
)); }
125 MATCHER_P(qName
, Name
, "") { return (arg
.Scope
+ arg
.Name
).str() == Name
; }
126 MATCHER_P3(IncludeHeaderWithRefAndDirectives
, IncludeHeader
, References
,
127 SupportedDirectives
, "") {
128 return (arg
.IncludeHeader
== IncludeHeader
) &&
129 (arg
.References
== References
) &&
130 (arg
.SupportedDirectives
== SupportedDirectives
);
133 auto readIndexFile(llvm::StringRef Text
) {
134 return readIndexFile(Text
, SymbolOrigin::Static
);
137 TEST(SerializationTest
, NoCrashOnEmptyYAML
) {
138 EXPECT_TRUE(bool(readIndexFile("")));
141 TEST(SerializationTest
, YAMLConversions
) {
142 auto ParsedYAML
= readIndexFile(YAML
);
143 ASSERT_TRUE(bool(ParsedYAML
)) << ParsedYAML
.takeError();
144 ASSERT_TRUE(bool(ParsedYAML
->Symbols
));
146 *ParsedYAML
->Symbols
,
147 UnorderedElementsAre(id("057557CEBF6E6B2D"), id("057557CEBF6E6B2E")));
149 auto Sym1
= *ParsedYAML
->Symbols
->find(
150 cantFail(SymbolID::fromStr("057557CEBF6E6B2D")));
151 auto Sym2
= *ParsedYAML
->Symbols
->find(
152 cantFail(SymbolID::fromStr("057557CEBF6E6B2E")));
154 EXPECT_THAT(Sym1
, qName("clang::Foo1"));
155 EXPECT_EQ(Sym1
.Signature
, "");
156 EXPECT_EQ(Sym1
.Documentation
, "Foo doc");
157 EXPECT_EQ(Sym1
.ReturnType
, "int");
158 EXPECT_EQ(StringRef(Sym1
.CanonicalDeclaration
.FileURI
), "file:///path/foo.h");
159 EXPECT_EQ(Sym1
.Origin
, SymbolOrigin::Static
);
160 EXPECT_EQ(static_cast<uint8_t>(Sym1
.Flags
), 129);
161 EXPECT_TRUE(Sym1
.Flags
& Symbol::IndexedForCodeCompletion
);
162 EXPECT_FALSE(Sym1
.Flags
& Symbol::Deprecated
);
165 UnorderedElementsAre(
166 IncludeHeaderWithRefAndDirectives("include1", 7u, Symbol::Include
),
167 IncludeHeaderWithRefAndDirectives("include2", 3u, Symbol::Import
),
168 IncludeHeaderWithRefAndDirectives("include3", 2u,
169 Symbol::Include
| Symbol::Import
),
170 IncludeHeaderWithRefAndDirectives("include4", 1u, Symbol::Invalid
)));
172 EXPECT_THAT(Sym2
, qName("clang::Foo2"));
173 EXPECT_EQ(Sym2
.Signature
, "-sig");
174 EXPECT_EQ(Sym2
.ReturnType
, "");
175 EXPECT_EQ(llvm::StringRef(Sym2
.CanonicalDeclaration
.FileURI
),
176 "file:///path/bar.h");
177 EXPECT_FALSE(Sym2
.Flags
& Symbol::IndexedForCodeCompletion
);
178 EXPECT_TRUE(Sym2
.Flags
& Symbol::Deprecated
);
180 ASSERT_TRUE(bool(ParsedYAML
->Refs
));
183 UnorderedElementsAre(Pair(cantFail(SymbolID::fromStr("057557CEBF6E6B2D")),
184 ::testing::SizeIs(1))));
185 auto Ref1
= ParsedYAML
->Refs
->begin()->second
.front();
186 EXPECT_EQ(Ref1
.Kind
, RefKind::Reference
);
187 EXPECT_EQ(StringRef(Ref1
.Location
.FileURI
), "file:///path/foo.cc");
189 SymbolID Base
= cantFail(SymbolID::fromStr("6481EE7AF2841756"));
190 SymbolID Derived
= cantFail(SymbolID::fromStr("6512AEC512EA3A2D"));
191 ASSERT_TRUE(bool(ParsedYAML
->Relations
));
193 *ParsedYAML
->Relations
,
194 UnorderedElementsAre(Relation
{Base
, RelationKind::BaseOf
, Derived
}));
196 ASSERT_TRUE(bool(ParsedYAML
->Cmd
));
197 auto &Cmd
= *ParsedYAML
->Cmd
;
198 ASSERT_EQ(Cmd
.Directory
, "testdir");
199 EXPECT_THAT(Cmd
.CommandLine
, ElementsAre("cmd1", "cmd2"));
201 ASSERT_TRUE(bool(ParsedYAML
->Sources
));
202 const auto *URI
= "file:///path/source1.cpp";
203 ASSERT_TRUE(ParsedYAML
->Sources
->count(URI
));
204 auto IGNDeserialized
= ParsedYAML
->Sources
->lookup(URI
);
205 EXPECT_EQ(llvm::toHex(IGNDeserialized
.Digest
), "EED8F5EAF25C453C");
206 EXPECT_THAT(IGNDeserialized
.DirectIncludes
,
207 ElementsAre("file:///path/inc1.h", "file:///path/inc2.h"));
208 EXPECT_EQ(IGNDeserialized
.URI
, URI
);
209 EXPECT_EQ(IGNDeserialized
.Flags
, IncludeGraphNode::SourceFlag(1));
212 std::vector
<std::string
> yamlFromSymbols(const SymbolSlab
&Slab
) {
213 std::vector
<std::string
> Result
;
214 for (const auto &Sym
: Slab
)
215 Result
.push_back(toYAML(Sym
));
218 std::vector
<std::string
> yamlFromRefs(const RefSlab
&Slab
) {
219 std::vector
<std::string
> Result
;
220 for (const auto &Refs
: Slab
)
221 Result
.push_back(toYAML(Refs
));
225 std::vector
<std::string
> yamlFromRelations(const RelationSlab
&Slab
) {
226 std::vector
<std::string
> Result
;
227 for (const auto &Rel
: Slab
)
228 Result
.push_back(toYAML(Rel
));
232 TEST(SerializationTest
, BinaryConversions
) {
233 auto In
= readIndexFile(YAML
);
234 EXPECT_TRUE(bool(In
)) << In
.takeError();
236 // Write to binary format, and parse again.
237 IndexFileOut
Out(*In
);
238 Out
.Format
= IndexFileFormat::RIFF
;
239 std::string Serialized
= llvm::to_string(Out
);
241 auto In2
= readIndexFile(Serialized
);
242 ASSERT_TRUE(bool(In2
)) << In2
.takeError();
243 ASSERT_TRUE(In2
->Symbols
);
244 ASSERT_TRUE(In2
->Refs
);
245 ASSERT_TRUE(In2
->Relations
);
247 // Assert the YAML serializations match, for nice comparisons and diffs.
248 EXPECT_THAT(yamlFromSymbols(*In2
->Symbols
),
249 UnorderedElementsAreArray(yamlFromSymbols(*In
->Symbols
)));
250 EXPECT_THAT(yamlFromRefs(*In2
->Refs
),
251 UnorderedElementsAreArray(yamlFromRefs(*In
->Refs
)));
252 EXPECT_THAT(yamlFromRelations(*In2
->Relations
),
253 UnorderedElementsAreArray(yamlFromRelations(*In
->Relations
)));
256 TEST(SerializationTest
, SrcsTest
) {
257 auto In
= readIndexFile(YAML
);
258 EXPECT_TRUE(bool(In
)) << In
.takeError();
260 std::string
TestContent("TestContent");
261 IncludeGraphNode IGN
;
262 IGN
.Digest
= digest(TestContent
);
263 IGN
.DirectIncludes
= {"inc1", "inc2"};
265 IGN
.Flags
|= IncludeGraphNode::SourceFlag::IsTU
;
266 IGN
.Flags
|= IncludeGraphNode::SourceFlag::HadErrors
;
267 IncludeGraph Sources
;
268 Sources
[IGN
.URI
] = IGN
;
269 // Write to binary format, and parse again.
270 IndexFileOut
Out(*In
);
271 Out
.Format
= IndexFileFormat::RIFF
;
272 Out
.Sources
= &Sources
;
274 std::string Serialized
= llvm::to_string(Out
);
276 auto In
= readIndexFile(Serialized
);
277 ASSERT_TRUE(bool(In
)) << In
.takeError();
278 ASSERT_TRUE(In
->Symbols
);
279 ASSERT_TRUE(In
->Refs
);
280 ASSERT_TRUE(In
->Sources
);
281 ASSERT_TRUE(In
->Sources
->count(IGN
.URI
));
282 // Assert the YAML serializations match, for nice comparisons and diffs.
283 EXPECT_THAT(yamlFromSymbols(*In
->Symbols
),
284 UnorderedElementsAreArray(yamlFromSymbols(*In
->Symbols
)));
285 EXPECT_THAT(yamlFromRefs(*In
->Refs
),
286 UnorderedElementsAreArray(yamlFromRefs(*In
->Refs
)));
287 auto IGNDeserialized
= In
->Sources
->lookup(IGN
.URI
);
288 EXPECT_EQ(IGNDeserialized
.Digest
, IGN
.Digest
);
289 EXPECT_EQ(IGNDeserialized
.DirectIncludes
, IGN
.DirectIncludes
);
290 EXPECT_EQ(IGNDeserialized
.URI
, IGN
.URI
);
291 EXPECT_EQ(IGNDeserialized
.Flags
, IGN
.Flags
);
295 TEST(SerializationTest
, CmdlTest
) {
296 auto In
= readIndexFile(YAML
);
297 EXPECT_TRUE(bool(In
)) << In
.takeError();
299 tooling::CompileCommand Cmd
;
300 Cmd
.Directory
= "testdir";
301 Cmd
.CommandLine
.push_back("cmd1");
302 Cmd
.CommandLine
.push_back("cmd2");
303 Cmd
.Filename
= "ignored";
304 Cmd
.Heuristic
= "ignored";
305 Cmd
.Output
= "ignored";
307 IndexFileOut
Out(*In
);
308 Out
.Format
= IndexFileFormat::RIFF
;
311 std::string Serialized
= llvm::to_string(Out
);
313 auto In
= readIndexFile(Serialized
);
314 ASSERT_TRUE(bool(In
)) << In
.takeError();
315 ASSERT_TRUE(In
->Cmd
);
317 const tooling::CompileCommand
&SerializedCmd
= *In
->Cmd
;
318 EXPECT_EQ(SerializedCmd
.CommandLine
, Cmd
.CommandLine
);
319 EXPECT_EQ(SerializedCmd
.Directory
, Cmd
.Directory
);
320 EXPECT_NE(SerializedCmd
.Filename
, Cmd
.Filename
);
321 EXPECT_NE(SerializedCmd
.Heuristic
, Cmd
.Heuristic
);
322 EXPECT_NE(SerializedCmd
.Output
, Cmd
.Output
);
326 // rlimit is part of POSIX. RLIMIT_AS does not exist in OpenBSD.
327 // Sanitizers use a lot of address space, so we can't apply strict limits.
328 #if LLVM_ON_UNIX && defined(RLIMIT_AS) && !LLVM_ADDRESS_SANITIZER_BUILD && \
329 !LLVM_MEMORY_SANITIZER_BUILD && !LLVM_THREAD_SANITIZER_BUILD
330 class ScopedMemoryLimit
{
331 struct rlimit OriginalLimit
;
332 bool Succeeded
= false;
335 ScopedMemoryLimit(rlim_t Bytes
) {
336 if (!getrlimit(RLIMIT_AS
, &OriginalLimit
)) {
337 struct rlimit NewLimit
= OriginalLimit
;
338 NewLimit
.rlim_cur
= Bytes
;
339 Succeeded
= !setrlimit(RLIMIT_AS
, &NewLimit
);
342 log("Failed to set rlimit");
345 ~ScopedMemoryLimit() {
347 setrlimit(RLIMIT_AS
, &OriginalLimit
);
351 class ScopedMemoryLimit
{
353 ScopedMemoryLimit(unsigned Bytes
) { log("rlimit unsupported"); }
357 // Test that our deserialization detects invalid array sizes without allocating.
358 // If this detection fails, the test should allocate a huge array and crash.
359 TEST(SerializationTest
, NoCrashOnBadArraySize
) {
360 // This test is tricky because we need to construct a subtly invalid file.
361 // First, create a valid serialized file.
362 auto In
= readIndexFile(YAML
);
363 ASSERT_FALSE(!In
) << In
.takeError();
364 IndexFileOut
Out(*In
);
365 Out
.Format
= IndexFileFormat::RIFF
;
366 std::string Serialized
= llvm::to_string(Out
);
368 // Low-level parse it again and find the `srcs` chunk we're going to corrupt.
369 auto Parsed
= riff::readFile(Serialized
);
370 ASSERT_FALSE(!Parsed
) << Parsed
.takeError();
371 auto Srcs
= llvm::find_if(Parsed
->Chunks
, [](riff::Chunk C
) {
372 return C
.ID
== riff::fourCC("srcs");
374 ASSERT_NE(Srcs
, Parsed
->Chunks
.end());
376 // Srcs consists of a sequence of IncludeGraphNodes. In our case, just one.
378 // - 1 byte: flags (1)
379 // - varint(stringID): URI
380 // - 8 byte: file digest
381 // - varint: DirectIncludes.length
382 // - repeated varint(stringID): DirectIncludes
383 // We want to set DirectIncludes.length to a huge number.
384 // The offset isn't trivial to find, so we use the file digest.
385 std::string FileDigest
= llvm::fromHex("EED8F5EAF25C453C");
386 unsigned Pos
= Srcs
->Data
.find_first_of(FileDigest
);
387 ASSERT_NE(Pos
, StringRef::npos
) << "Couldn't locate file digest";
388 Pos
+= FileDigest
.size();
390 // Varints are little-endian base-128 numbers, where the top-bit of each byte
391 // indicates whether there are more. ffffffff0f -> 0xffffffff.
392 std::string CorruptSrcs
=
393 (Srcs
->Data
.take_front(Pos
) + llvm::fromHex("ffffffff0f") +
394 "some_random_garbage")
396 Srcs
->Data
= CorruptSrcs
;
398 // Try to crash rather than hang on large allocation.
399 ScopedMemoryLimit
MemLimit(1000 * 1024 * 1024); // 1GB
401 std::string CorruptFile
= llvm::to_string(*Parsed
);
402 auto CorruptParsed
= readIndexFile(CorruptFile
);
403 ASSERT_TRUE(!CorruptParsed
);
404 EXPECT_EQ(llvm::toString(CorruptParsed
.takeError()),
405 "malformed or truncated include uri");
408 // Check we detect invalid string table size size without allocating it first.
409 // If this detection fails, the test should allocate a huge array and crash.
410 TEST(SerializationTest
, NoCrashOnBadStringTableSize
) {
411 if (!llvm::compression::zlib::isAvailable()) {
412 log("skipping test, no zlib");
416 // First, create a valid serialized file.
417 auto In
= readIndexFile(YAML
);
418 ASSERT_FALSE(!In
) << In
.takeError();
419 IndexFileOut
Out(*In
);
420 Out
.Format
= IndexFileFormat::RIFF
;
421 std::string Serialized
= llvm::to_string(Out
);
423 // Low-level parse it again, we're going to replace the `stri` chunk.
424 auto Parsed
= riff::readFile(Serialized
);
425 ASSERT_FALSE(!Parsed
) << Parsed
.takeError();
426 auto Stri
= llvm::find_if(Parsed
->Chunks
, [](riff::Chunk C
) {
427 return C
.ID
== riff::fourCC("stri");
429 ASSERT_NE(Stri
, Parsed
->Chunks
.end());
431 // stri consists of an 8 byte uncompressed-size, and then compressed data.
432 // We'll claim our small amount of data expands to 4GB
433 std::string CorruptStri
=
434 (llvm::fromHex("ffffffff") + Stri
->Data
.drop_front(4)).str();
435 Stri
->Data
= CorruptStri
;
436 std::string FileDigest
= llvm::fromHex("EED8F5EAF25C453C");
438 // Try to crash rather than hang on large allocation.
439 ScopedMemoryLimit
MemLimit(1000 * 1024 * 1024); // 1GB
441 std::string CorruptFile
= llvm::to_string(*Parsed
);
442 auto CorruptParsed
= readIndexFile(CorruptFile
);
443 ASSERT_TRUE(!CorruptParsed
);
444 EXPECT_THAT(llvm::toString(CorruptParsed
.takeError()),
445 testing::HasSubstr("bytes is implausible"));
449 } // namespace clangd