gn build: Merge r374476
[llvm-complete.git] / tools / sancov / sancov.cpp
blob9645183c2bc3652d8e226f360f04f47624125d63
1 //===-- sancov.cpp --------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This file is a command-line tool for reading and analyzing sanitizer
9 // coverage.
10 //===----------------------------------------------------------------------===//
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/StringExtras.h"
13 #include "llvm/ADT/Twine.h"
14 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
15 #include "llvm/MC/MCAsmInfo.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
18 #include "llvm/MC/MCInst.h"
19 #include "llvm/MC/MCInstrAnalysis.h"
20 #include "llvm/MC/MCInstrInfo.h"
21 #include "llvm/MC/MCObjectFileInfo.h"
22 #include "llvm/MC/MCRegisterInfo.h"
23 #include "llvm/MC/MCSubtargetInfo.h"
24 #include "llvm/Object/Archive.h"
25 #include "llvm/Object/Binary.h"
26 #include "llvm/Object/COFF.h"
27 #include "llvm/Object/MachO.h"
28 #include "llvm/Object/ObjectFile.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Errc.h"
32 #include "llvm/Support/ErrorOr.h"
33 #include "llvm/Support/FileSystem.h"
34 #include "llvm/Support/MD5.h"
35 #include "llvm/Support/ManagedStatic.h"
36 #include "llvm/Support/MemoryBuffer.h"
37 #include "llvm/Support/Path.h"
38 #include "llvm/Support/PrettyStackTrace.h"
39 #include "llvm/Support/Regex.h"
40 #include "llvm/Support/SHA1.h"
41 #include "llvm/Support/Signals.h"
42 #include "llvm/Support/SourceMgr.h"
43 #include "llvm/Support/SpecialCaseList.h"
44 #include "llvm/Support/TargetRegistry.h"
45 #include "llvm/Support/TargetSelect.h"
46 #include "llvm/Support/YAMLParser.h"
47 #include "llvm/Support/raw_ostream.h"
49 #include <set>
50 #include <vector>
52 using namespace llvm;
54 namespace {
56 // --------- COMMAND LINE FLAGS ---------
58 enum ActionType {
59 CoveredFunctionsAction,
60 HtmlReportAction,
61 MergeAction,
62 NotCoveredFunctionsAction,
63 PrintAction,
64 PrintCovPointsAction,
65 StatsAction,
66 SymbolizeAction
69 cl::opt<ActionType> Action(
70 cl::desc("Action (required)"), cl::Required,
71 cl::values(
72 clEnumValN(PrintAction, "print", "Print coverage addresses"),
73 clEnumValN(PrintCovPointsAction, "print-coverage-pcs",
74 "Print coverage instrumentation points addresses."),
75 clEnumValN(CoveredFunctionsAction, "covered-functions",
76 "Print all covered funcions."),
77 clEnumValN(NotCoveredFunctionsAction, "not-covered-functions",
78 "Print all not covered funcions."),
79 clEnumValN(StatsAction, "print-coverage-stats",
80 "Print coverage statistics."),
81 clEnumValN(HtmlReportAction, "html-report",
82 "REMOVED. Use -symbolize & coverage-report-server.py."),
83 clEnumValN(SymbolizeAction, "symbolize",
84 "Produces a symbolized JSON report from binary report."),
85 clEnumValN(MergeAction, "merge", "Merges reports.")));
87 static cl::list<std::string>
88 ClInputFiles(cl::Positional, cl::OneOrMore,
89 cl::desc("<action> <binary files...> <.sancov files...> "
90 "<.symcov files...>"));
92 static cl::opt<bool> ClDemangle("demangle", cl::init(true),
93 cl::desc("Print demangled function name."));
95 static cl::opt<bool>
96 ClSkipDeadFiles("skip-dead-files", cl::init(true),
97 cl::desc("Do not list dead source files in reports."));
99 static cl::opt<std::string> ClStripPathPrefix(
100 "strip_path_prefix", cl::init(""),
101 cl::desc("Strip this prefix from file paths in reports."));
103 static cl::opt<std::string>
104 ClBlacklist("blacklist", cl::init(""),
105 cl::desc("Blacklist file (sanitizer blacklist format)."));
107 static cl::opt<bool> ClUseDefaultBlacklist(
108 "use_default_blacklist", cl::init(true), cl::Hidden,
109 cl::desc("Controls if default blacklist should be used."));
111 static const char *const DefaultBlacklistStr = "fun:__sanitizer_.*\n"
112 "src:/usr/include/.*\n"
113 "src:.*/libc\\+\\+/.*\n";
115 // --------- FORMAT SPECIFICATION ---------
117 struct FileHeader {
118 uint32_t Bitness;
119 uint32_t Magic;
122 static const uint32_t BinCoverageMagic = 0xC0BFFFFF;
123 static const uint32_t Bitness32 = 0xFFFFFF32;
124 static const uint32_t Bitness64 = 0xFFFFFF64;
126 static const Regex SancovFileRegex("(.*)\\.[0-9]+\\.sancov");
127 static const Regex SymcovFileRegex(".*\\.symcov");
129 // --------- MAIN DATASTRUCTURES ----------
131 // Contents of .sancov file: list of coverage point addresses that were
132 // executed.
133 struct RawCoverage {
134 explicit RawCoverage(std::unique_ptr<std::set<uint64_t>> Addrs)
135 : Addrs(std::move(Addrs)) {}
137 // Read binary .sancov file.
138 static ErrorOr<std::unique_ptr<RawCoverage>>
139 read(const std::string &FileName);
141 std::unique_ptr<std::set<uint64_t>> Addrs;
144 // Coverage point has an opaque Id and corresponds to multiple source locations.
145 struct CoveragePoint {
146 explicit CoveragePoint(const std::string &Id) : Id(Id) {}
148 std::string Id;
149 SmallVector<DILineInfo, 1> Locs;
152 // Symcov file content: set of covered Ids plus information about all available
153 // coverage points.
154 struct SymbolizedCoverage {
155 // Read json .symcov file.
156 static std::unique_ptr<SymbolizedCoverage> read(const std::string &InputFile);
158 std::set<std::string> CoveredIds;
159 std::string BinaryHash;
160 std::vector<CoveragePoint> Points;
163 struct CoverageStats {
164 size_t AllPoints;
165 size_t CovPoints;
166 size_t AllFns;
167 size_t CovFns;
170 // --------- ERROR HANDLING ---------
172 static void fail(const llvm::Twine &E) {
173 errs() << "ERROR: " << E << "\n";
174 exit(1);
177 static void failIf(bool B, const llvm::Twine &E) {
178 if (B)
179 fail(E);
182 static void failIfError(std::error_code Error) {
183 if (!Error)
184 return;
185 errs() << "ERROR: " << Error.message() << "(" << Error.value() << ")\n";
186 exit(1);
189 template <typename T> static void failIfError(const ErrorOr<T> &E) {
190 failIfError(E.getError());
193 static void failIfError(Error Err) {
194 if (Err) {
195 logAllUnhandledErrors(std::move(Err), errs(), "ERROR: ");
196 exit(1);
200 template <typename T> static void failIfError(Expected<T> &E) {
201 failIfError(E.takeError());
204 static void failIfNotEmpty(const llvm::Twine &E) {
205 if (E.str().empty())
206 return;
207 fail(E);
210 template <typename T>
211 static void failIfEmpty(const std::unique_ptr<T> &Ptr,
212 const std::string &Message) {
213 if (Ptr.get())
214 return;
215 fail(Message);
218 // ----------- Coverage I/O ----------
219 template <typename T>
220 static void readInts(const char *Start, const char *End,
221 std::set<uint64_t> *Ints) {
222 const T *S = reinterpret_cast<const T *>(Start);
223 const T *E = reinterpret_cast<const T *>(End);
224 std::copy(S, E, std::inserter(*Ints, Ints->end()));
227 ErrorOr<std::unique_ptr<RawCoverage>>
228 RawCoverage::read(const std::string &FileName) {
229 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
230 MemoryBuffer::getFile(FileName);
231 if (!BufOrErr)
232 return BufOrErr.getError();
233 std::unique_ptr<MemoryBuffer> Buf = std::move(BufOrErr.get());
234 if (Buf->getBufferSize() < 8) {
235 errs() << "File too small (<8): " << Buf->getBufferSize() << '\n';
236 return make_error_code(errc::illegal_byte_sequence);
238 const FileHeader *Header =
239 reinterpret_cast<const FileHeader *>(Buf->getBufferStart());
241 if (Header->Magic != BinCoverageMagic) {
242 errs() << "Wrong magic: " << Header->Magic << '\n';
243 return make_error_code(errc::illegal_byte_sequence);
246 auto Addrs = std::make_unique<std::set<uint64_t>>();
248 switch (Header->Bitness) {
249 case Bitness64:
250 readInts<uint64_t>(Buf->getBufferStart() + 8, Buf->getBufferEnd(),
251 Addrs.get());
252 break;
253 case Bitness32:
254 readInts<uint32_t>(Buf->getBufferStart() + 8, Buf->getBufferEnd(),
255 Addrs.get());
256 break;
257 default:
258 errs() << "Unsupported bitness: " << Header->Bitness << '\n';
259 return make_error_code(errc::illegal_byte_sequence);
262 // Ignore slots that are zero, so a runtime implementation is not required
263 // to compactify the data.
264 Addrs->erase(0);
266 return std::unique_ptr<RawCoverage>(new RawCoverage(std::move(Addrs)));
269 // Print coverage addresses.
270 raw_ostream &operator<<(raw_ostream &OS, const RawCoverage &CoverageData) {
271 for (auto Addr : *CoverageData.Addrs) {
272 OS << "0x";
273 OS.write_hex(Addr);
274 OS << "\n";
276 return OS;
279 static raw_ostream &operator<<(raw_ostream &OS, const CoverageStats &Stats) {
280 OS << "all-edges: " << Stats.AllPoints << "\n";
281 OS << "cov-edges: " << Stats.CovPoints << "\n";
282 OS << "all-functions: " << Stats.AllFns << "\n";
283 OS << "cov-functions: " << Stats.CovFns << "\n";
284 return OS;
287 // Helper for writing out JSON. Handles indents and commas using
288 // scope variables for objects and arrays.
289 class JSONWriter {
290 public:
291 JSONWriter(raw_ostream &Out) : OS(Out) {}
292 JSONWriter(const JSONWriter &) = delete;
293 ~JSONWriter() { OS << "\n"; }
295 void operator<<(StringRef S) { printJSONStringLiteral(S, OS); }
297 // Helper RAII class to output JSON objects.
298 class Object {
299 public:
300 Object(JSONWriter *W, raw_ostream &OS) : W(W), OS(OS) {
301 OS << "{";
302 W->Indent++;
304 ~Object() {
305 W->Indent--;
306 OS << "\n";
307 W->indent();
308 OS << "}";
311 void key(StringRef Key) {
312 Index++;
313 if (Index > 0)
314 OS << ",";
315 OS << "\n";
316 W->indent();
317 printJSONStringLiteral(Key, OS);
318 OS << " : ";
321 private:
322 JSONWriter *W;
323 raw_ostream &OS;
324 int Index = -1;
327 Object object() { return {this, OS}; }
329 // Helper RAII class to output JSON arrays.
330 class Array {
331 public:
332 Array(raw_ostream &OS) : OS(OS) { OS << "["; }
333 ~Array() { OS << "]"; }
334 void next() {
335 Index++;
336 if (Index > 0)
337 OS << ", ";
340 private:
341 raw_ostream &OS;
342 int Index = -1;
345 Array array() { return {OS}; }
347 private:
348 void indent() { OS.indent(Indent * 2); }
350 static void printJSONStringLiteral(StringRef S, raw_ostream &OS) {
351 if (S.find('"') == std::string::npos) {
352 OS << "\"" << S << "\"";
353 return;
355 OS << "\"";
356 for (char Ch : S.bytes()) {
357 if (Ch == '"')
358 OS << "\\";
359 OS << Ch;
361 OS << "\"";
364 raw_ostream &OS;
365 int Indent = 0;
368 // Output symbolized information for coverage points in JSON.
369 // Format:
370 // {
371 // '<file_name>' : {
372 // '<function_name>' : {
373 // '<point_id'> : '<line_number>:'<column_number'.
374 // ....
375 // }
376 // }
377 // }
378 static void operator<<(JSONWriter &W,
379 const std::vector<CoveragePoint> &Points) {
380 // Group points by file.
381 auto ByFile(W.object());
382 std::map<std::string, std::vector<const CoveragePoint *>> PointsByFile;
383 for (const auto &Point : Points) {
384 for (const DILineInfo &Loc : Point.Locs) {
385 PointsByFile[Loc.FileName].push_back(&Point);
389 for (const auto &P : PointsByFile) {
390 std::string FileName = P.first;
391 ByFile.key(FileName);
393 // Group points by function.
394 auto ByFn(W.object());
395 std::map<std::string, std::vector<const CoveragePoint *>> PointsByFn;
396 for (auto PointPtr : P.second) {
397 for (const DILineInfo &Loc : PointPtr->Locs) {
398 PointsByFn[Loc.FunctionName].push_back(PointPtr);
402 for (const auto &P : PointsByFn) {
403 std::string FunctionName = P.first;
404 std::set<std::string> WrittenIds;
406 ByFn.key(FunctionName);
408 // Output <point_id> : "<line>:<col>".
409 auto ById(W.object());
410 for (const CoveragePoint *Point : P.second) {
411 for (const auto &Loc : Point->Locs) {
412 if (Loc.FileName != FileName || Loc.FunctionName != FunctionName)
413 continue;
414 if (WrittenIds.find(Point->Id) != WrittenIds.end())
415 continue;
417 WrittenIds.insert(Point->Id);
418 ById.key(Point->Id);
419 W << (utostr(Loc.Line) + ":" + utostr(Loc.Column));
426 static void operator<<(JSONWriter &W, const SymbolizedCoverage &C) {
427 auto O(W.object());
430 O.key("covered-points");
431 auto PointsArray(W.array());
433 for (const std::string &P : C.CoveredIds) {
434 PointsArray.next();
435 W << P;
440 if (!C.BinaryHash.empty()) {
441 O.key("binary-hash");
442 W << C.BinaryHash;
447 O.key("point-symbol-info");
448 W << C.Points;
452 static std::string parseScalarString(yaml::Node *N) {
453 SmallString<64> StringStorage;
454 yaml::ScalarNode *S = dyn_cast<yaml::ScalarNode>(N);
455 failIf(!S, "expected string");
456 return S->getValue(StringStorage);
459 std::unique_ptr<SymbolizedCoverage>
460 SymbolizedCoverage::read(const std::string &InputFile) {
461 auto Coverage(std::make_unique<SymbolizedCoverage>());
463 std::map<std::string, CoveragePoint> Points;
464 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
465 MemoryBuffer::getFile(InputFile);
466 failIfError(BufOrErr);
468 SourceMgr SM;
469 yaml::Stream S(**BufOrErr, SM);
471 yaml::document_iterator DI = S.begin();
472 failIf(DI == S.end(), "empty document: " + InputFile);
473 yaml::Node *Root = DI->getRoot();
474 failIf(!Root, "expecting root node: " + InputFile);
475 yaml::MappingNode *Top = dyn_cast<yaml::MappingNode>(Root);
476 failIf(!Top, "expecting mapping node: " + InputFile);
478 for (auto &KVNode : *Top) {
479 auto Key = parseScalarString(KVNode.getKey());
481 if (Key == "covered-points") {
482 yaml::SequenceNode *Points =
483 dyn_cast<yaml::SequenceNode>(KVNode.getValue());
484 failIf(!Points, "expected array: " + InputFile);
486 for (auto I = Points->begin(), E = Points->end(); I != E; ++I) {
487 Coverage->CoveredIds.insert(parseScalarString(&*I));
489 } else if (Key == "binary-hash") {
490 Coverage->BinaryHash = parseScalarString(KVNode.getValue());
491 } else if (Key == "point-symbol-info") {
492 yaml::MappingNode *PointSymbolInfo =
493 dyn_cast<yaml::MappingNode>(KVNode.getValue());
494 failIf(!PointSymbolInfo, "expected mapping node: " + InputFile);
496 for (auto &FileKVNode : *PointSymbolInfo) {
497 auto Filename = parseScalarString(FileKVNode.getKey());
499 yaml::MappingNode *FileInfo =
500 dyn_cast<yaml::MappingNode>(FileKVNode.getValue());
501 failIf(!FileInfo, "expected mapping node: " + InputFile);
503 for (auto &FunctionKVNode : *FileInfo) {
504 auto FunctionName = parseScalarString(FunctionKVNode.getKey());
506 yaml::MappingNode *FunctionInfo =
507 dyn_cast<yaml::MappingNode>(FunctionKVNode.getValue());
508 failIf(!FunctionInfo, "expected mapping node: " + InputFile);
510 for (auto &PointKVNode : *FunctionInfo) {
511 auto PointId = parseScalarString(PointKVNode.getKey());
512 auto Loc = parseScalarString(PointKVNode.getValue());
514 size_t ColonPos = Loc.find(':');
515 failIf(ColonPos == std::string::npos, "expected ':': " + InputFile);
517 auto LineStr = Loc.substr(0, ColonPos);
518 auto ColStr = Loc.substr(ColonPos + 1, Loc.size());
520 if (Points.find(PointId) == Points.end())
521 Points.insert(std::make_pair(PointId, CoveragePoint(PointId)));
523 DILineInfo LineInfo;
524 LineInfo.FileName = Filename;
525 LineInfo.FunctionName = FunctionName;
526 char *End;
527 LineInfo.Line = std::strtoul(LineStr.c_str(), &End, 10);
528 LineInfo.Column = std::strtoul(ColStr.c_str(), &End, 10);
530 CoveragePoint *CoveragePoint = &Points.find(PointId)->second;
531 CoveragePoint->Locs.push_back(LineInfo);
535 } else {
536 errs() << "Ignoring unknown key: " << Key << "\n";
540 for (auto &KV : Points) {
541 Coverage->Points.push_back(KV.second);
544 return Coverage;
547 // ---------- MAIN FUNCTIONALITY ----------
549 std::string stripPathPrefix(std::string Path) {
550 if (ClStripPathPrefix.empty())
551 return Path;
552 size_t Pos = Path.find(ClStripPathPrefix);
553 if (Pos == std::string::npos)
554 return Path;
555 return Path.substr(Pos + ClStripPathPrefix.size());
558 static std::unique_ptr<symbolize::LLVMSymbolizer> createSymbolizer() {
559 symbolize::LLVMSymbolizer::Options SymbolizerOptions;
560 SymbolizerOptions.Demangle = ClDemangle;
561 SymbolizerOptions.UseSymbolTable = true;
562 return std::unique_ptr<symbolize::LLVMSymbolizer>(
563 new symbolize::LLVMSymbolizer(SymbolizerOptions));
566 static std::string normalizeFilename(const std::string &FileName) {
567 SmallString<256> S(FileName);
568 sys::path::remove_dots(S, /* remove_dot_dot */ true);
569 return stripPathPrefix(S.str().str());
572 class Blacklists {
573 public:
574 Blacklists()
575 : DefaultBlacklist(createDefaultBlacklist()),
576 UserBlacklist(createUserBlacklist()) {}
578 bool isBlacklisted(const DILineInfo &I) {
579 if (DefaultBlacklist &&
580 DefaultBlacklist->inSection("sancov", "fun", I.FunctionName))
581 return true;
582 if (DefaultBlacklist &&
583 DefaultBlacklist->inSection("sancov", "src", I.FileName))
584 return true;
585 if (UserBlacklist &&
586 UserBlacklist->inSection("sancov", "fun", I.FunctionName))
587 return true;
588 if (UserBlacklist && UserBlacklist->inSection("sancov", "src", I.FileName))
589 return true;
590 return false;
593 private:
594 static std::unique_ptr<SpecialCaseList> createDefaultBlacklist() {
595 if (!ClUseDefaultBlacklist)
596 return std::unique_ptr<SpecialCaseList>();
597 std::unique_ptr<MemoryBuffer> MB =
598 MemoryBuffer::getMemBuffer(DefaultBlacklistStr);
599 std::string Error;
600 auto Blacklist = SpecialCaseList::create(MB.get(), Error);
601 failIfNotEmpty(Error);
602 return Blacklist;
605 static std::unique_ptr<SpecialCaseList> createUserBlacklist() {
606 if (ClBlacklist.empty())
607 return std::unique_ptr<SpecialCaseList>();
609 return SpecialCaseList::createOrDie({{ClBlacklist}});
611 std::unique_ptr<SpecialCaseList> DefaultBlacklist;
612 std::unique_ptr<SpecialCaseList> UserBlacklist;
615 static std::vector<CoveragePoint>
616 getCoveragePoints(const std::string &ObjectFile,
617 const std::set<uint64_t> &Addrs,
618 const std::set<uint64_t> &CoveredAddrs) {
619 std::vector<CoveragePoint> Result;
620 auto Symbolizer(createSymbolizer());
621 Blacklists B;
623 std::set<std::string> CoveredFiles;
624 if (ClSkipDeadFiles) {
625 for (auto Addr : CoveredAddrs) {
626 // TODO: it would be neccessary to set proper section index here.
627 // object::SectionedAddress::UndefSection works for only absolute
628 // addresses.
629 object::SectionedAddress ModuleAddress = {
630 Addr, object::SectionedAddress::UndefSection};
632 auto LineInfo = Symbolizer->symbolizeCode(ObjectFile, ModuleAddress);
633 failIfError(LineInfo);
634 CoveredFiles.insert(LineInfo->FileName);
635 auto InliningInfo =
636 Symbolizer->symbolizeInlinedCode(ObjectFile, ModuleAddress);
637 failIfError(InliningInfo);
638 for (uint32_t I = 0; I < InliningInfo->getNumberOfFrames(); ++I) {
639 auto FrameInfo = InliningInfo->getFrame(I);
640 CoveredFiles.insert(FrameInfo.FileName);
645 for (auto Addr : Addrs) {
646 std::set<DILineInfo> Infos; // deduplicate debug info.
648 // TODO: it would be neccessary to set proper section index here.
649 // object::SectionedAddress::UndefSection works for only absolute addresses.
650 object::SectionedAddress ModuleAddress = {
651 Addr, object::SectionedAddress::UndefSection};
653 auto LineInfo = Symbolizer->symbolizeCode(ObjectFile, ModuleAddress);
654 failIfError(LineInfo);
655 if (ClSkipDeadFiles &&
656 CoveredFiles.find(LineInfo->FileName) == CoveredFiles.end())
657 continue;
658 LineInfo->FileName = normalizeFilename(LineInfo->FileName);
659 if (B.isBlacklisted(*LineInfo))
660 continue;
662 auto Id = utohexstr(Addr, true);
663 auto Point = CoveragePoint(Id);
664 Infos.insert(*LineInfo);
665 Point.Locs.push_back(*LineInfo);
667 auto InliningInfo =
668 Symbolizer->symbolizeInlinedCode(ObjectFile, ModuleAddress);
669 failIfError(InliningInfo);
670 for (uint32_t I = 0; I < InliningInfo->getNumberOfFrames(); ++I) {
671 auto FrameInfo = InliningInfo->getFrame(I);
672 if (ClSkipDeadFiles &&
673 CoveredFiles.find(FrameInfo.FileName) == CoveredFiles.end())
674 continue;
675 FrameInfo.FileName = normalizeFilename(FrameInfo.FileName);
676 if (B.isBlacklisted(FrameInfo))
677 continue;
678 if (Infos.find(FrameInfo) == Infos.end()) {
679 Infos.insert(FrameInfo);
680 Point.Locs.push_back(FrameInfo);
684 Result.push_back(Point);
687 return Result;
690 static bool isCoveragePointSymbol(StringRef Name) {
691 return Name == "__sanitizer_cov" || Name == "__sanitizer_cov_with_check" ||
692 Name == "__sanitizer_cov_trace_func_enter" ||
693 Name == "__sanitizer_cov_trace_pc_guard" ||
694 // Mac has '___' prefix
695 Name == "___sanitizer_cov" || Name == "___sanitizer_cov_with_check" ||
696 Name == "___sanitizer_cov_trace_func_enter" ||
697 Name == "___sanitizer_cov_trace_pc_guard";
700 // Locate __sanitizer_cov* function addresses inside the stubs table on MachO.
701 static void findMachOIndirectCovFunctions(const object::MachOObjectFile &O,
702 std::set<uint64_t> *Result) {
703 MachO::dysymtab_command Dysymtab = O.getDysymtabLoadCommand();
704 MachO::symtab_command Symtab = O.getSymtabLoadCommand();
706 for (const auto &Load : O.load_commands()) {
707 if (Load.C.cmd == MachO::LC_SEGMENT_64) {
708 MachO::segment_command_64 Seg = O.getSegment64LoadCommand(Load);
709 for (unsigned J = 0; J < Seg.nsects; ++J) {
710 MachO::section_64 Sec = O.getSection64(Load, J);
712 uint32_t SectionType = Sec.flags & MachO::SECTION_TYPE;
713 if (SectionType == MachO::S_SYMBOL_STUBS) {
714 uint32_t Stride = Sec.reserved2;
715 uint32_t Cnt = Sec.size / Stride;
716 uint32_t N = Sec.reserved1;
717 for (uint32_t J = 0; J < Cnt && N + J < Dysymtab.nindirectsyms; J++) {
718 uint32_t IndirectSymbol =
719 O.getIndirectSymbolTableEntry(Dysymtab, N + J);
720 uint64_t Addr = Sec.addr + J * Stride;
721 if (IndirectSymbol < Symtab.nsyms) {
722 object::SymbolRef Symbol = *(O.getSymbolByIndex(IndirectSymbol));
723 Expected<StringRef> Name = Symbol.getName();
724 failIfError(Name);
725 if (isCoveragePointSymbol(Name.get())) {
726 Result->insert(Addr);
733 if (Load.C.cmd == MachO::LC_SEGMENT) {
734 errs() << "ERROR: 32 bit MachO binaries not supported\n";
739 // Locate __sanitizer_cov* function addresses that are used for coverage
740 // reporting.
741 static std::set<uint64_t>
742 findSanitizerCovFunctions(const object::ObjectFile &O) {
743 std::set<uint64_t> Result;
745 for (const object::SymbolRef &Symbol : O.symbols()) {
746 Expected<uint64_t> AddressOrErr = Symbol.getAddress();
747 failIfError(AddressOrErr);
748 uint64_t Address = AddressOrErr.get();
750 Expected<StringRef> NameOrErr = Symbol.getName();
751 failIfError(NameOrErr);
752 StringRef Name = NameOrErr.get();
754 if (!(Symbol.getFlags() & object::BasicSymbolRef::SF_Undefined) &&
755 isCoveragePointSymbol(Name)) {
756 Result.insert(Address);
760 if (const auto *CO = dyn_cast<object::COFFObjectFile>(&O)) {
761 for (const object::ExportDirectoryEntryRef &Export :
762 CO->export_directories()) {
763 uint32_t RVA;
764 std::error_code EC = Export.getExportRVA(RVA);
765 failIfError(EC);
767 StringRef Name;
768 EC = Export.getSymbolName(Name);
769 failIfError(EC);
771 if (isCoveragePointSymbol(Name))
772 Result.insert(CO->getImageBase() + RVA);
776 if (const auto *MO = dyn_cast<object::MachOObjectFile>(&O)) {
777 findMachOIndirectCovFunctions(*MO, &Result);
780 return Result;
783 static uint64_t getPreviousInstructionPc(uint64_t PC,
784 Triple TheTriple) {
785 if (TheTriple.isARM()) {
786 return (PC - 3) & (~1);
787 } else if (TheTriple.isAArch64()) {
788 return PC - 4;
789 } else if (TheTriple.isMIPS()) {
790 return PC - 8;
791 } else {
792 return PC - 1;
796 // Locate addresses of all coverage points in a file. Coverage point
797 // is defined as the 'address of instruction following __sanitizer_cov
798 // call - 1'.
799 static void getObjectCoveragePoints(const object::ObjectFile &O,
800 std::set<uint64_t> *Addrs) {
801 Triple TheTriple("unknown-unknown-unknown");
802 TheTriple.setArch(Triple::ArchType(O.getArch()));
803 auto TripleName = TheTriple.getTriple();
805 std::string Error;
806 const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
807 failIfNotEmpty(Error);
809 std::unique_ptr<const MCSubtargetInfo> STI(
810 TheTarget->createMCSubtargetInfo(TripleName, "", ""));
811 failIfEmpty(STI, "no subtarget info for target " + TripleName);
813 std::unique_ptr<const MCRegisterInfo> MRI(
814 TheTarget->createMCRegInfo(TripleName));
815 failIfEmpty(MRI, "no register info for target " + TripleName);
817 std::unique_ptr<const MCAsmInfo> AsmInfo(
818 TheTarget->createMCAsmInfo(*MRI, TripleName));
819 failIfEmpty(AsmInfo, "no asm info for target " + TripleName);
821 std::unique_ptr<const MCObjectFileInfo> MOFI(new MCObjectFileInfo);
822 MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
823 std::unique_ptr<MCDisassembler> DisAsm(
824 TheTarget->createMCDisassembler(*STI, Ctx));
825 failIfEmpty(DisAsm, "no disassembler info for target " + TripleName);
827 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
828 failIfEmpty(MII, "no instruction info for target " + TripleName);
830 std::unique_ptr<const MCInstrAnalysis> MIA(
831 TheTarget->createMCInstrAnalysis(MII.get()));
832 failIfEmpty(MIA, "no instruction analysis info for target " + TripleName);
834 auto SanCovAddrs = findSanitizerCovFunctions(O);
835 if (SanCovAddrs.empty())
836 fail("__sanitizer_cov* functions not found");
838 for (object::SectionRef Section : O.sections()) {
839 if (Section.isVirtual() || !Section.isText()) // llvm-objdump does the same.
840 continue;
841 uint64_t SectionAddr = Section.getAddress();
842 uint64_t SectSize = Section.getSize();
843 if (!SectSize)
844 continue;
846 Expected<StringRef> BytesStr = Section.getContents();
847 failIfError(BytesStr);
848 ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(*BytesStr);
850 for (uint64_t Index = 0, Size = 0; Index < Section.getSize();
851 Index += Size) {
852 MCInst Inst;
853 if (!DisAsm->getInstruction(Inst, Size, Bytes.slice(Index),
854 SectionAddr + Index, nulls(), nulls())) {
855 if (Size == 0)
856 Size = 1;
857 continue;
859 uint64_t Addr = Index + SectionAddr;
860 // Sanitizer coverage uses the address of the next instruction - 1.
861 uint64_t CovPoint = getPreviousInstructionPc(Addr + Size, TheTriple);
862 uint64_t Target;
863 if (MIA->isCall(Inst) &&
864 MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target) &&
865 SanCovAddrs.find(Target) != SanCovAddrs.end())
866 Addrs->insert(CovPoint);
871 static void
872 visitObjectFiles(const object::Archive &A,
873 function_ref<void(const object::ObjectFile &)> Fn) {
874 Error Err = Error::success();
875 for (auto &C : A.children(Err)) {
876 Expected<std::unique_ptr<object::Binary>> ChildOrErr = C.getAsBinary();
877 failIfError(ChildOrErr);
878 if (auto *O = dyn_cast<object::ObjectFile>(&*ChildOrErr.get()))
879 Fn(*O);
880 else
881 failIfError(object::object_error::invalid_file_type);
883 failIfError(std::move(Err));
886 static void
887 visitObjectFiles(const std::string &FileName,
888 function_ref<void(const object::ObjectFile &)> Fn) {
889 Expected<object::OwningBinary<object::Binary>> BinaryOrErr =
890 object::createBinary(FileName);
891 if (!BinaryOrErr)
892 failIfError(BinaryOrErr);
894 object::Binary &Binary = *BinaryOrErr.get().getBinary();
895 if (object::Archive *A = dyn_cast<object::Archive>(&Binary))
896 visitObjectFiles(*A, Fn);
897 else if (object::ObjectFile *O = dyn_cast<object::ObjectFile>(&Binary))
898 Fn(*O);
899 else
900 failIfError(object::object_error::invalid_file_type);
903 static std::set<uint64_t>
904 findSanitizerCovFunctions(const std::string &FileName) {
905 std::set<uint64_t> Result;
906 visitObjectFiles(FileName, [&](const object::ObjectFile &O) {
907 auto Addrs = findSanitizerCovFunctions(O);
908 Result.insert(Addrs.begin(), Addrs.end());
910 return Result;
913 // Locate addresses of all coverage points in a file. Coverage point
914 // is defined as the 'address of instruction following __sanitizer_cov
915 // call - 1'.
916 static std::set<uint64_t> findCoveragePointAddrs(const std::string &FileName) {
917 std::set<uint64_t> Result;
918 visitObjectFiles(FileName, [&](const object::ObjectFile &O) {
919 getObjectCoveragePoints(O, &Result);
921 return Result;
924 static void printCovPoints(const std::string &ObjFile, raw_ostream &OS) {
925 for (uint64_t Addr : findCoveragePointAddrs(ObjFile)) {
926 OS << "0x";
927 OS.write_hex(Addr);
928 OS << "\n";
932 static ErrorOr<bool> isCoverageFile(const std::string &FileName) {
933 auto ShortFileName = llvm::sys::path::filename(FileName);
934 if (!SancovFileRegex.match(ShortFileName))
935 return false;
937 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
938 MemoryBuffer::getFile(FileName);
939 if (!BufOrErr) {
940 errs() << "Warning: " << BufOrErr.getError().message() << "("
941 << BufOrErr.getError().value()
942 << "), filename: " << llvm::sys::path::filename(FileName) << "\n";
943 return BufOrErr.getError();
945 std::unique_ptr<MemoryBuffer> Buf = std::move(BufOrErr.get());
946 if (Buf->getBufferSize() < 8) {
947 return false;
949 const FileHeader *Header =
950 reinterpret_cast<const FileHeader *>(Buf->getBufferStart());
951 return Header->Magic == BinCoverageMagic;
954 static bool isSymbolizedCoverageFile(const std::string &FileName) {
955 auto ShortFileName = llvm::sys::path::filename(FileName);
956 return SymcovFileRegex.match(ShortFileName);
959 static std::unique_ptr<SymbolizedCoverage>
960 symbolize(const RawCoverage &Data, const std::string ObjectFile) {
961 auto Coverage = std::make_unique<SymbolizedCoverage>();
963 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
964 MemoryBuffer::getFile(ObjectFile);
965 failIfError(BufOrErr);
966 SHA1 Hasher;
967 Hasher.update((*BufOrErr)->getBuffer());
968 Coverage->BinaryHash = toHex(Hasher.final());
970 Blacklists B;
971 auto Symbolizer(createSymbolizer());
973 for (uint64_t Addr : *Data.Addrs) {
974 // TODO: it would be neccessary to set proper section index here.
975 // object::SectionedAddress::UndefSection works for only absolute addresses.
976 auto LineInfo = Symbolizer->symbolizeCode(
977 ObjectFile, {Addr, object::SectionedAddress::UndefSection});
978 failIfError(LineInfo);
979 if (B.isBlacklisted(*LineInfo))
980 continue;
982 Coverage->CoveredIds.insert(utohexstr(Addr, true));
985 std::set<uint64_t> AllAddrs = findCoveragePointAddrs(ObjectFile);
986 if (!std::includes(AllAddrs.begin(), AllAddrs.end(), Data.Addrs->begin(),
987 Data.Addrs->end())) {
988 fail("Coverage points in binary and .sancov file do not match.");
990 Coverage->Points = getCoveragePoints(ObjectFile, AllAddrs, *Data.Addrs);
991 return Coverage;
994 struct FileFn {
995 bool operator<(const FileFn &RHS) const {
996 return std::tie(FileName, FunctionName) <
997 std::tie(RHS.FileName, RHS.FunctionName);
1000 std::string FileName;
1001 std::string FunctionName;
1004 static std::set<FileFn>
1005 computeFunctions(const std::vector<CoveragePoint> &Points) {
1006 std::set<FileFn> Fns;
1007 for (const auto &Point : Points) {
1008 for (const auto &Loc : Point.Locs) {
1009 Fns.insert(FileFn{Loc.FileName, Loc.FunctionName});
1012 return Fns;
1015 static std::set<FileFn>
1016 computeNotCoveredFunctions(const SymbolizedCoverage &Coverage) {
1017 auto Fns = computeFunctions(Coverage.Points);
1019 for (const auto &Point : Coverage.Points) {
1020 if (Coverage.CoveredIds.find(Point.Id) == Coverage.CoveredIds.end())
1021 continue;
1023 for (const auto &Loc : Point.Locs) {
1024 Fns.erase(FileFn{Loc.FileName, Loc.FunctionName});
1028 return Fns;
1031 static std::set<FileFn>
1032 computeCoveredFunctions(const SymbolizedCoverage &Coverage) {
1033 auto AllFns = computeFunctions(Coverage.Points);
1034 std::set<FileFn> Result;
1036 for (const auto &Point : Coverage.Points) {
1037 if (Coverage.CoveredIds.find(Point.Id) == Coverage.CoveredIds.end())
1038 continue;
1040 for (const auto &Loc : Point.Locs) {
1041 Result.insert(FileFn{Loc.FileName, Loc.FunctionName});
1045 return Result;
1048 typedef std::map<FileFn, std::pair<uint32_t, uint32_t>> FunctionLocs;
1049 // finds first location in a file for each function.
1050 static FunctionLocs resolveFunctions(const SymbolizedCoverage &Coverage,
1051 const std::set<FileFn> &Fns) {
1052 FunctionLocs Result;
1053 for (const auto &Point : Coverage.Points) {
1054 for (const auto &Loc : Point.Locs) {
1055 FileFn Fn = FileFn{Loc.FileName, Loc.FunctionName};
1056 if (Fns.find(Fn) == Fns.end())
1057 continue;
1059 auto P = std::make_pair(Loc.Line, Loc.Column);
1060 auto I = Result.find(Fn);
1061 if (I == Result.end() || I->second > P) {
1062 Result[Fn] = P;
1066 return Result;
1069 static void printFunctionLocs(const FunctionLocs &FnLocs, raw_ostream &OS) {
1070 for (const auto &P : FnLocs) {
1071 OS << stripPathPrefix(P.first.FileName) << ":" << P.second.first << " "
1072 << P.first.FunctionName << "\n";
1075 CoverageStats computeStats(const SymbolizedCoverage &Coverage) {
1076 CoverageStats Stats = {Coverage.Points.size(), Coverage.CoveredIds.size(),
1077 computeFunctions(Coverage.Points).size(),
1078 computeCoveredFunctions(Coverage).size()};
1079 return Stats;
1082 // Print list of covered functions.
1083 // Line format: <file_name>:<line> <function_name>
1084 static void printCoveredFunctions(const SymbolizedCoverage &CovData,
1085 raw_ostream &OS) {
1086 auto CoveredFns = computeCoveredFunctions(CovData);
1087 printFunctionLocs(resolveFunctions(CovData, CoveredFns), OS);
1090 // Print list of not covered functions.
1091 // Line format: <file_name>:<line> <function_name>
1092 static void printNotCoveredFunctions(const SymbolizedCoverage &CovData,
1093 raw_ostream &OS) {
1094 auto NotCoveredFns = computeNotCoveredFunctions(CovData);
1095 printFunctionLocs(resolveFunctions(CovData, NotCoveredFns), OS);
1098 // Read list of files and merges their coverage info.
1099 static void readAndPrintRawCoverage(const std::vector<std::string> &FileNames,
1100 raw_ostream &OS) {
1101 std::vector<std::unique_ptr<RawCoverage>> Covs;
1102 for (const auto &FileName : FileNames) {
1103 auto Cov = RawCoverage::read(FileName);
1104 if (!Cov)
1105 continue;
1106 OS << *Cov.get();
1110 static std::unique_ptr<SymbolizedCoverage>
1111 merge(const std::vector<std::unique_ptr<SymbolizedCoverage>> &Coverages) {
1112 if (Coverages.empty())
1113 return nullptr;
1115 auto Result = std::make_unique<SymbolizedCoverage>();
1117 for (size_t I = 0; I < Coverages.size(); ++I) {
1118 const SymbolizedCoverage &Coverage = *Coverages[I];
1119 std::string Prefix;
1120 if (Coverages.size() > 1) {
1121 // prefix is not needed when there's only one file.
1122 Prefix = utostr(I);
1125 for (const auto &Id : Coverage.CoveredIds) {
1126 Result->CoveredIds.insert(Prefix + Id);
1129 for (const auto &CovPoint : Coverage.Points) {
1130 CoveragePoint NewPoint(CovPoint);
1131 NewPoint.Id = Prefix + CovPoint.Id;
1132 Result->Points.push_back(NewPoint);
1136 if (Coverages.size() == 1) {
1137 Result->BinaryHash = Coverages[0]->BinaryHash;
1140 return Result;
1143 static std::unique_ptr<SymbolizedCoverage>
1144 readSymbolizeAndMergeCmdArguments(std::vector<std::string> FileNames) {
1145 std::vector<std::unique_ptr<SymbolizedCoverage>> Coverages;
1148 // Short name => file name.
1149 std::map<std::string, std::string> ObjFiles;
1150 std::string FirstObjFile;
1151 std::set<std::string> CovFiles;
1153 // Partition input values into coverage/object files.
1154 for (const auto &FileName : FileNames) {
1155 if (isSymbolizedCoverageFile(FileName)) {
1156 Coverages.push_back(SymbolizedCoverage::read(FileName));
1159 auto ErrorOrIsCoverage = isCoverageFile(FileName);
1160 if (!ErrorOrIsCoverage)
1161 continue;
1162 if (ErrorOrIsCoverage.get()) {
1163 CovFiles.insert(FileName);
1164 } else {
1165 auto ShortFileName = llvm::sys::path::filename(FileName);
1166 if (ObjFiles.find(ShortFileName) != ObjFiles.end()) {
1167 fail("Duplicate binary file with a short name: " + ShortFileName);
1170 ObjFiles[ShortFileName] = FileName;
1171 if (FirstObjFile.empty())
1172 FirstObjFile = FileName;
1176 SmallVector<StringRef, 2> Components;
1178 // Object file => list of corresponding coverage file names.
1179 std::map<std::string, std::vector<std::string>> CoverageByObjFile;
1180 for (const auto &FileName : CovFiles) {
1181 auto ShortFileName = llvm::sys::path::filename(FileName);
1182 auto Ok = SancovFileRegex.match(ShortFileName, &Components);
1183 if (!Ok) {
1184 fail("Can't match coverage file name against "
1185 "<module_name>.<pid>.sancov pattern: " +
1186 FileName);
1189 auto Iter = ObjFiles.find(Components[1]);
1190 if (Iter == ObjFiles.end()) {
1191 fail("Object file for coverage not found: " + FileName);
1194 CoverageByObjFile[Iter->second].push_back(FileName);
1197 for (const auto &Pair : ObjFiles) {
1198 auto FileName = Pair.second;
1199 if (CoverageByObjFile.find(FileName) == CoverageByObjFile.end())
1200 errs() << "WARNING: No coverage file for " << FileName << "\n";
1203 // Read raw coverage and symbolize it.
1204 for (const auto &Pair : CoverageByObjFile) {
1205 if (findSanitizerCovFunctions(Pair.first).empty()) {
1206 errs()
1207 << "WARNING: Ignoring " << Pair.first
1208 << " and its coverage because __sanitizer_cov* functions were not "
1209 "found.\n";
1210 continue;
1213 for (const std::string &CoverageFile : Pair.second) {
1214 auto DataOrError = RawCoverage::read(CoverageFile);
1215 failIfError(DataOrError);
1216 Coverages.push_back(symbolize(*DataOrError.get(), Pair.first));
1221 return merge(Coverages);
1224 } // namespace
1226 int main(int Argc, char **Argv) {
1227 // Print stack trace if we signal out.
1228 sys::PrintStackTraceOnErrorSignal(Argv[0]);
1229 PrettyStackTraceProgram X(Argc, Argv);
1230 llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
1232 llvm::InitializeAllTargetInfos();
1233 llvm::InitializeAllTargetMCs();
1234 llvm::InitializeAllDisassemblers();
1236 cl::ParseCommandLineOptions(Argc, Argv,
1237 "Sanitizer Coverage Processing Tool (sancov)\n\n"
1238 " This tool can extract various coverage-related information from: \n"
1239 " coverage-instrumented binary files, raw .sancov files and their "
1240 "symbolized .symcov version.\n"
1241 " Depending on chosen action the tool expects different input files:\n"
1242 " -print-coverage-pcs - coverage-instrumented binary files\n"
1243 " -print-coverage - .sancov files\n"
1244 " <other actions> - .sancov files & corresponding binary "
1245 "files, .symcov files\n"
1248 // -print doesn't need object files.
1249 if (Action == PrintAction) {
1250 readAndPrintRawCoverage(ClInputFiles, outs());
1251 return 0;
1252 } else if (Action == PrintCovPointsAction) {
1253 // -print-coverage-points doesn't need coverage files.
1254 for (const std::string &ObjFile : ClInputFiles) {
1255 printCovPoints(ObjFile, outs());
1257 return 0;
1260 auto Coverage = readSymbolizeAndMergeCmdArguments(ClInputFiles);
1261 failIf(!Coverage, "No valid coverage files given.");
1263 switch (Action) {
1264 case CoveredFunctionsAction: {
1265 printCoveredFunctions(*Coverage, outs());
1266 return 0;
1268 case NotCoveredFunctionsAction: {
1269 printNotCoveredFunctions(*Coverage, outs());
1270 return 0;
1272 case StatsAction: {
1273 outs() << computeStats(*Coverage);
1274 return 0;
1276 case MergeAction:
1277 case SymbolizeAction: { // merge & symbolize are synonims.
1278 JSONWriter W(outs());
1279 W << *Coverage;
1280 return 0;
1282 case HtmlReportAction:
1283 errs() << "-html-report option is removed: "
1284 "use -symbolize & coverage-report-server.py instead\n";
1285 return 1;
1286 case PrintAction:
1287 case PrintCovPointsAction:
1288 llvm_unreachable("unsupported action");