[NFC][Py Reformat] Reformat python files in llvm
[llvm-project.git] / llvm / tools / sancov / sancov.cpp
blob9d29e9a13315cedf0df3db047de7dde1b1880c12
1 //===-- sancov.cpp --------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This file is a command-line tool for reading and analyzing sanitizer
9 // coverage.
10 //===----------------------------------------------------------------------===//
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/StringExtras.h"
13 #include "llvm/ADT/Twine.h"
14 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
15 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
16 #include "llvm/MC/MCAsmInfo.h"
17 #include "llvm/MC/MCContext.h"
18 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
19 #include "llvm/MC/MCInst.h"
20 #include "llvm/MC/MCInstrAnalysis.h"
21 #include "llvm/MC/MCInstrInfo.h"
22 #include "llvm/MC/MCObjectFileInfo.h"
23 #include "llvm/MC/MCRegisterInfo.h"
24 #include "llvm/MC/MCSubtargetInfo.h"
25 #include "llvm/MC/MCTargetOptions.h"
26 #include "llvm/MC/TargetRegistry.h"
27 #include "llvm/Object/Archive.h"
28 #include "llvm/Object/Binary.h"
29 #include "llvm/Object/COFF.h"
30 #include "llvm/Object/MachO.h"
31 #include "llvm/Object/ObjectFile.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Errc.h"
35 #include "llvm/Support/ErrorOr.h"
36 #include "llvm/Support/FileSystem.h"
37 #include "llvm/Support/InitLLVM.h"
38 #include "llvm/Support/JSON.h"
39 #include "llvm/Support/MD5.h"
40 #include "llvm/Support/MemoryBuffer.h"
41 #include "llvm/Support/Path.h"
42 #include "llvm/Support/Regex.h"
43 #include "llvm/Support/SHA1.h"
44 #include "llvm/Support/SourceMgr.h"
45 #include "llvm/Support/SpecialCaseList.h"
46 #include "llvm/Support/TargetSelect.h"
47 #include "llvm/Support/VirtualFileSystem.h"
48 #include "llvm/Support/YAMLParser.h"
49 #include "llvm/Support/raw_ostream.h"
51 #include <set>
52 #include <vector>
54 using namespace llvm;
56 namespace {
58 // --------- COMMAND LINE FLAGS ---------
60 cl::OptionCategory Cat("sancov Options");
62 enum ActionType {
63 CoveredFunctionsAction,
64 HtmlReportAction,
65 MergeAction,
66 NotCoveredFunctionsAction,
67 PrintAction,
68 PrintCovPointsAction,
69 StatsAction,
70 SymbolizeAction
73 cl::opt<ActionType> Action(
74 cl::desc("Action (required)"), cl::Required,
75 cl::values(
76 clEnumValN(PrintAction, "print", "Print coverage addresses"),
77 clEnumValN(PrintCovPointsAction, "print-coverage-pcs",
78 "Print coverage instrumentation points addresses."),
79 clEnumValN(CoveredFunctionsAction, "covered-functions",
80 "Print all covered funcions."),
81 clEnumValN(NotCoveredFunctionsAction, "not-covered-functions",
82 "Print all not covered funcions."),
83 clEnumValN(StatsAction, "print-coverage-stats",
84 "Print coverage statistics."),
85 clEnumValN(HtmlReportAction, "html-report",
86 "REMOVED. Use -symbolize & coverage-report-server.py."),
87 clEnumValN(SymbolizeAction, "symbolize",
88 "Produces a symbolized JSON report from binary report."),
89 clEnumValN(MergeAction, "merge", "Merges reports.")),
90 cl::cat(Cat));
92 static cl::list<std::string>
93 ClInputFiles(cl::Positional, cl::OneOrMore,
94 cl::desc("<action> <binary files...> <.sancov files...> "
95 "<.symcov files...>"),
96 cl::cat(Cat));
98 static cl::opt<bool> ClDemangle("demangle", cl::init(true),
99 cl::desc("Print demangled function name"),
100 cl::cat(Cat));
102 static cl::opt<bool>
103 ClSkipDeadFiles("skip-dead-files", cl::init(true),
104 cl::desc("Do not list dead source files in reports"),
105 cl::cat(Cat));
107 static cl::opt<std::string>
108 ClStripPathPrefix("strip_path_prefix", cl::init(""),
109 cl::desc("Strip this prefix from file paths in reports"),
110 cl::cat(Cat));
112 static cl::opt<std::string>
113 ClIgnorelist("ignorelist", cl::init(""),
114 cl::desc("Ignorelist file (sanitizer ignorelist format)"),
115 cl::cat(Cat));
117 static cl::opt<bool> ClUseDefaultIgnorelist(
118 "use_default_ignorelist", cl::init(true), cl::Hidden,
119 cl::desc("Controls if default ignorelist should be used"), cl::cat(Cat));
121 static const char *const DefaultIgnorelistStr = "fun:__sanitizer_.*\n"
122 "src:/usr/include/.*\n"
123 "src:.*/libc\\+\\+/.*\n";
125 // --------- FORMAT SPECIFICATION ---------
127 struct FileHeader {
128 uint32_t Bitness;
129 uint32_t Magic;
132 static const uint32_t BinCoverageMagic = 0xC0BFFFFF;
133 static const uint32_t Bitness32 = 0xFFFFFF32;
134 static const uint32_t Bitness64 = 0xFFFFFF64;
136 static const Regex SancovFileRegex("(.*)\\.[0-9]+\\.sancov");
137 static const Regex SymcovFileRegex(".*\\.symcov");
139 // --------- MAIN DATASTRUCTURES ----------
141 // Contents of .sancov file: list of coverage point addresses that were
142 // executed.
143 struct RawCoverage {
144 explicit RawCoverage(std::unique_ptr<std::set<uint64_t>> Addrs)
145 : Addrs(std::move(Addrs)) {}
147 // Read binary .sancov file.
148 static ErrorOr<std::unique_ptr<RawCoverage>>
149 read(const std::string &FileName);
151 std::unique_ptr<std::set<uint64_t>> Addrs;
154 // Coverage point has an opaque Id and corresponds to multiple source locations.
155 struct CoveragePoint {
156 explicit CoveragePoint(const std::string &Id) : Id(Id) {}
158 std::string Id;
159 SmallVector<DILineInfo, 1> Locs;
162 // Symcov file content: set of covered Ids plus information about all available
163 // coverage points.
164 struct SymbolizedCoverage {
165 // Read json .symcov file.
166 static std::unique_ptr<SymbolizedCoverage> read(const std::string &InputFile);
168 std::set<std::string> CoveredIds;
169 std::string BinaryHash;
170 std::vector<CoveragePoint> Points;
173 struct CoverageStats {
174 size_t AllPoints;
175 size_t CovPoints;
176 size_t AllFns;
177 size_t CovFns;
180 // --------- ERROR HANDLING ---------
182 static void fail(const llvm::Twine &E) {
183 errs() << "ERROR: " << E << "\n";
184 exit(1);
187 static void failIf(bool B, const llvm::Twine &E) {
188 if (B)
189 fail(E);
192 static void failIfError(std::error_code Error) {
193 if (!Error)
194 return;
195 errs() << "ERROR: " << Error.message() << "(" << Error.value() << ")\n";
196 exit(1);
199 template <typename T> static void failIfError(const ErrorOr<T> &E) {
200 failIfError(E.getError());
203 static void failIfError(Error Err) {
204 if (Err) {
205 logAllUnhandledErrors(std::move(Err), errs(), "ERROR: ");
206 exit(1);
210 template <typename T> static void failIfError(Expected<T> &E) {
211 failIfError(E.takeError());
214 static void failIfNotEmpty(const llvm::Twine &E) {
215 if (E.str().empty())
216 return;
217 fail(E);
220 template <typename T>
221 static void failIfEmpty(const std::unique_ptr<T> &Ptr,
222 const std::string &Message) {
223 if (Ptr.get())
224 return;
225 fail(Message);
228 // ----------- Coverage I/O ----------
229 template <typename T>
230 static void readInts(const char *Start, const char *End,
231 std::set<uint64_t> *Ints) {
232 const T *S = reinterpret_cast<const T *>(Start);
233 const T *E = reinterpret_cast<const T *>(End);
234 std::copy(S, E, std::inserter(*Ints, Ints->end()));
237 ErrorOr<std::unique_ptr<RawCoverage>>
238 RawCoverage::read(const std::string &FileName) {
239 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
240 MemoryBuffer::getFile(FileName);
241 if (!BufOrErr)
242 return BufOrErr.getError();
243 std::unique_ptr<MemoryBuffer> Buf = std::move(BufOrErr.get());
244 if (Buf->getBufferSize() < 8) {
245 errs() << "File too small (<8): " << Buf->getBufferSize() << '\n';
246 return make_error_code(errc::illegal_byte_sequence);
248 const FileHeader *Header =
249 reinterpret_cast<const FileHeader *>(Buf->getBufferStart());
251 if (Header->Magic != BinCoverageMagic) {
252 errs() << "Wrong magic: " << Header->Magic << '\n';
253 return make_error_code(errc::illegal_byte_sequence);
256 auto Addrs = std::make_unique<std::set<uint64_t>>();
258 switch (Header->Bitness) {
259 case Bitness64:
260 readInts<uint64_t>(Buf->getBufferStart() + 8, Buf->getBufferEnd(),
261 Addrs.get());
262 break;
263 case Bitness32:
264 readInts<uint32_t>(Buf->getBufferStart() + 8, Buf->getBufferEnd(),
265 Addrs.get());
266 break;
267 default:
268 errs() << "Unsupported bitness: " << Header->Bitness << '\n';
269 return make_error_code(errc::illegal_byte_sequence);
272 // Ignore slots that are zero, so a runtime implementation is not required
273 // to compactify the data.
274 Addrs->erase(0);
276 return std::unique_ptr<RawCoverage>(new RawCoverage(std::move(Addrs)));
279 // Print coverage addresses.
280 raw_ostream &operator<<(raw_ostream &OS, const RawCoverage &CoverageData) {
281 for (auto Addr : *CoverageData.Addrs) {
282 OS << "0x";
283 OS.write_hex(Addr);
284 OS << "\n";
286 return OS;
289 static raw_ostream &operator<<(raw_ostream &OS, const CoverageStats &Stats) {
290 OS << "all-edges: " << Stats.AllPoints << "\n";
291 OS << "cov-edges: " << Stats.CovPoints << "\n";
292 OS << "all-functions: " << Stats.AllFns << "\n";
293 OS << "cov-functions: " << Stats.CovFns << "\n";
294 return OS;
297 // Output symbolized information for coverage points in JSON.
298 // Format:
299 // {
300 // '<file_name>' : {
301 // '<function_name>' : {
302 // '<point_id'> : '<line_number>:'<column_number'.
303 // ....
304 // }
305 // }
306 // }
307 static void operator<<(json::OStream &W,
308 const std::vector<CoveragePoint> &Points) {
309 // Group points by file.
310 std::map<std::string, std::vector<const CoveragePoint *>> PointsByFile;
311 for (const auto &Point : Points) {
312 for (const DILineInfo &Loc : Point.Locs) {
313 PointsByFile[Loc.FileName].push_back(&Point);
317 for (const auto &P : PointsByFile) {
318 std::string FileName = P.first;
319 std::map<std::string, std::vector<const CoveragePoint *>> PointsByFn;
320 for (auto PointPtr : P.second) {
321 for (const DILineInfo &Loc : PointPtr->Locs) {
322 PointsByFn[Loc.FunctionName].push_back(PointPtr);
326 W.attributeObject(P.first, [&] {
327 // Group points by function.
328 for (const auto &P : PointsByFn) {
329 std::string FunctionName = P.first;
330 std::set<std::string> WrittenIds;
332 W.attributeObject(FunctionName, [&] {
333 for (const CoveragePoint *Point : P.second) {
334 for (const auto &Loc : Point->Locs) {
335 if (Loc.FileName != FileName || Loc.FunctionName != FunctionName)
336 continue;
337 if (WrittenIds.find(Point->Id) != WrittenIds.end())
338 continue;
340 // Output <point_id> : "<line>:<col>".
341 WrittenIds.insert(Point->Id);
342 W.attribute(Point->Id,
343 (utostr(Loc.Line) + ":" + utostr(Loc.Column)));
352 static void operator<<(json::OStream &W, const SymbolizedCoverage &C) {
353 W.object([&] {
354 W.attributeArray("covered-points", [&] {
355 for (const std::string &P : C.CoveredIds) {
356 W.value(P);
359 W.attribute("binary-hash", C.BinaryHash);
360 W.attributeObject("point-symbol-info", [&] { W << C.Points; });
364 static std::string parseScalarString(yaml::Node *N) {
365 SmallString<64> StringStorage;
366 yaml::ScalarNode *S = dyn_cast<yaml::ScalarNode>(N);
367 failIf(!S, "expected string");
368 return std::string(S->getValue(StringStorage));
371 std::unique_ptr<SymbolizedCoverage>
372 SymbolizedCoverage::read(const std::string &InputFile) {
373 auto Coverage(std::make_unique<SymbolizedCoverage>());
375 std::map<std::string, CoveragePoint> Points;
376 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
377 MemoryBuffer::getFile(InputFile);
378 failIfError(BufOrErr);
380 SourceMgr SM;
381 yaml::Stream S(**BufOrErr, SM);
383 yaml::document_iterator DI = S.begin();
384 failIf(DI == S.end(), "empty document: " + InputFile);
385 yaml::Node *Root = DI->getRoot();
386 failIf(!Root, "expecting root node: " + InputFile);
387 yaml::MappingNode *Top = dyn_cast<yaml::MappingNode>(Root);
388 failIf(!Top, "expecting mapping node: " + InputFile);
390 for (auto &KVNode : *Top) {
391 auto Key = parseScalarString(KVNode.getKey());
393 if (Key == "covered-points") {
394 yaml::SequenceNode *Points =
395 dyn_cast<yaml::SequenceNode>(KVNode.getValue());
396 failIf(!Points, "expected array: " + InputFile);
398 for (auto I = Points->begin(), E = Points->end(); I != E; ++I) {
399 Coverage->CoveredIds.insert(parseScalarString(&*I));
401 } else if (Key == "binary-hash") {
402 Coverage->BinaryHash = parseScalarString(KVNode.getValue());
403 } else if (Key == "point-symbol-info") {
404 yaml::MappingNode *PointSymbolInfo =
405 dyn_cast<yaml::MappingNode>(KVNode.getValue());
406 failIf(!PointSymbolInfo, "expected mapping node: " + InputFile);
408 for (auto &FileKVNode : *PointSymbolInfo) {
409 auto Filename = parseScalarString(FileKVNode.getKey());
411 yaml::MappingNode *FileInfo =
412 dyn_cast<yaml::MappingNode>(FileKVNode.getValue());
413 failIf(!FileInfo, "expected mapping node: " + InputFile);
415 for (auto &FunctionKVNode : *FileInfo) {
416 auto FunctionName = parseScalarString(FunctionKVNode.getKey());
418 yaml::MappingNode *FunctionInfo =
419 dyn_cast<yaml::MappingNode>(FunctionKVNode.getValue());
420 failIf(!FunctionInfo, "expected mapping node: " + InputFile);
422 for (auto &PointKVNode : *FunctionInfo) {
423 auto PointId = parseScalarString(PointKVNode.getKey());
424 auto Loc = parseScalarString(PointKVNode.getValue());
426 size_t ColonPos = Loc.find(':');
427 failIf(ColonPos == std::string::npos, "expected ':': " + InputFile);
429 auto LineStr = Loc.substr(0, ColonPos);
430 auto ColStr = Loc.substr(ColonPos + 1, Loc.size());
432 if (Points.find(PointId) == Points.end())
433 Points.insert(std::make_pair(PointId, CoveragePoint(PointId)));
435 DILineInfo LineInfo;
436 LineInfo.FileName = Filename;
437 LineInfo.FunctionName = FunctionName;
438 char *End;
439 LineInfo.Line = std::strtoul(LineStr.c_str(), &End, 10);
440 LineInfo.Column = std::strtoul(ColStr.c_str(), &End, 10);
442 CoveragePoint *CoveragePoint = &Points.find(PointId)->second;
443 CoveragePoint->Locs.push_back(LineInfo);
447 } else {
448 errs() << "Ignoring unknown key: " << Key << "\n";
452 for (auto &KV : Points) {
453 Coverage->Points.push_back(KV.second);
456 return Coverage;
459 // ---------- MAIN FUNCTIONALITY ----------
461 std::string stripPathPrefix(std::string Path) {
462 if (ClStripPathPrefix.empty())
463 return Path;
464 size_t Pos = Path.find(ClStripPathPrefix);
465 if (Pos == std::string::npos)
466 return Path;
467 return Path.substr(Pos + ClStripPathPrefix.size());
470 static std::unique_ptr<symbolize::LLVMSymbolizer> createSymbolizer() {
471 symbolize::LLVMSymbolizer::Options SymbolizerOptions;
472 SymbolizerOptions.Demangle = ClDemangle;
473 SymbolizerOptions.UseSymbolTable = true;
474 return std::unique_ptr<symbolize::LLVMSymbolizer>(
475 new symbolize::LLVMSymbolizer(SymbolizerOptions));
478 static std::string normalizeFilename(const std::string &FileName) {
479 SmallString<256> S(FileName);
480 sys::path::remove_dots(S, /* remove_dot_dot */ true);
481 return stripPathPrefix(sys::path::convert_to_slash(std::string(S)));
484 class Ignorelists {
485 public:
486 Ignorelists()
487 : DefaultIgnorelist(createDefaultIgnorelist()),
488 UserIgnorelist(createUserIgnorelist()) {}
490 bool isIgnorelisted(const DILineInfo &I) {
491 if (DefaultIgnorelist &&
492 DefaultIgnorelist->inSection("sancov", "fun", I.FunctionName))
493 return true;
494 if (DefaultIgnorelist &&
495 DefaultIgnorelist->inSection("sancov", "src", I.FileName))
496 return true;
497 if (UserIgnorelist &&
498 UserIgnorelist->inSection("sancov", "fun", I.FunctionName))
499 return true;
500 if (UserIgnorelist &&
501 UserIgnorelist->inSection("sancov", "src", I.FileName))
502 return true;
503 return false;
506 private:
507 static std::unique_ptr<SpecialCaseList> createDefaultIgnorelist() {
508 if (!ClUseDefaultIgnorelist)
509 return std::unique_ptr<SpecialCaseList>();
510 std::unique_ptr<MemoryBuffer> MB =
511 MemoryBuffer::getMemBuffer(DefaultIgnorelistStr);
512 std::string Error;
513 auto Ignorelist = SpecialCaseList::create(MB.get(), Error);
514 failIfNotEmpty(Error);
515 return Ignorelist;
518 static std::unique_ptr<SpecialCaseList> createUserIgnorelist() {
519 if (ClIgnorelist.empty())
520 return std::unique_ptr<SpecialCaseList>();
521 return SpecialCaseList::createOrDie({{ClIgnorelist}},
522 *vfs::getRealFileSystem());
524 std::unique_ptr<SpecialCaseList> DefaultIgnorelist;
525 std::unique_ptr<SpecialCaseList> UserIgnorelist;
528 static std::vector<CoveragePoint>
529 getCoveragePoints(const std::string &ObjectFile,
530 const std::set<uint64_t> &Addrs,
531 const std::set<uint64_t> &CoveredAddrs) {
532 std::vector<CoveragePoint> Result;
533 auto Symbolizer(createSymbolizer());
534 Ignorelists Ig;
536 std::set<std::string> CoveredFiles;
537 if (ClSkipDeadFiles) {
538 for (auto Addr : CoveredAddrs) {
539 // TODO: it would be neccessary to set proper section index here.
540 // object::SectionedAddress::UndefSection works for only absolute
541 // addresses.
542 object::SectionedAddress ModuleAddress = {
543 Addr, object::SectionedAddress::UndefSection};
545 auto LineInfo = Symbolizer->symbolizeCode(ObjectFile, ModuleAddress);
546 failIfError(LineInfo);
547 CoveredFiles.insert(LineInfo->FileName);
548 auto InliningInfo =
549 Symbolizer->symbolizeInlinedCode(ObjectFile, ModuleAddress);
550 failIfError(InliningInfo);
551 for (uint32_t I = 0; I < InliningInfo->getNumberOfFrames(); ++I) {
552 auto FrameInfo = InliningInfo->getFrame(I);
553 CoveredFiles.insert(FrameInfo.FileName);
558 for (auto Addr : Addrs) {
559 std::set<DILineInfo> Infos; // deduplicate debug info.
561 // TODO: it would be neccessary to set proper section index here.
562 // object::SectionedAddress::UndefSection works for only absolute addresses.
563 object::SectionedAddress ModuleAddress = {
564 Addr, object::SectionedAddress::UndefSection};
566 auto LineInfo = Symbolizer->symbolizeCode(ObjectFile, ModuleAddress);
567 failIfError(LineInfo);
568 if (ClSkipDeadFiles &&
569 CoveredFiles.find(LineInfo->FileName) == CoveredFiles.end())
570 continue;
571 LineInfo->FileName = normalizeFilename(LineInfo->FileName);
572 if (Ig.isIgnorelisted(*LineInfo))
573 continue;
575 auto Id = utohexstr(Addr, true);
576 auto Point = CoveragePoint(Id);
577 Infos.insert(*LineInfo);
578 Point.Locs.push_back(*LineInfo);
580 auto InliningInfo =
581 Symbolizer->symbolizeInlinedCode(ObjectFile, ModuleAddress);
582 failIfError(InliningInfo);
583 for (uint32_t I = 0; I < InliningInfo->getNumberOfFrames(); ++I) {
584 auto FrameInfo = InliningInfo->getFrame(I);
585 if (ClSkipDeadFiles &&
586 CoveredFiles.find(FrameInfo.FileName) == CoveredFiles.end())
587 continue;
588 FrameInfo.FileName = normalizeFilename(FrameInfo.FileName);
589 if (Ig.isIgnorelisted(FrameInfo))
590 continue;
591 if (Infos.find(FrameInfo) == Infos.end()) {
592 Infos.insert(FrameInfo);
593 Point.Locs.push_back(FrameInfo);
597 Result.push_back(Point);
600 return Result;
603 static bool isCoveragePointSymbol(StringRef Name) {
604 return Name == "__sanitizer_cov" || Name == "__sanitizer_cov_with_check" ||
605 Name == "__sanitizer_cov_trace_func_enter" ||
606 Name == "__sanitizer_cov_trace_pc_guard" ||
607 // Mac has '___' prefix
608 Name == "___sanitizer_cov" || Name == "___sanitizer_cov_with_check" ||
609 Name == "___sanitizer_cov_trace_func_enter" ||
610 Name == "___sanitizer_cov_trace_pc_guard";
613 // Locate __sanitizer_cov* function addresses inside the stubs table on MachO.
614 static void findMachOIndirectCovFunctions(const object::MachOObjectFile &O,
615 std::set<uint64_t> *Result) {
616 MachO::dysymtab_command Dysymtab = O.getDysymtabLoadCommand();
617 MachO::symtab_command Symtab = O.getSymtabLoadCommand();
619 for (const auto &Load : O.load_commands()) {
620 if (Load.C.cmd == MachO::LC_SEGMENT_64) {
621 MachO::segment_command_64 Seg = O.getSegment64LoadCommand(Load);
622 for (unsigned J = 0; J < Seg.nsects; ++J) {
623 MachO::section_64 Sec = O.getSection64(Load, J);
625 uint32_t SectionType = Sec.flags & MachO::SECTION_TYPE;
626 if (SectionType == MachO::S_SYMBOL_STUBS) {
627 uint32_t Stride = Sec.reserved2;
628 uint32_t Cnt = Sec.size / Stride;
629 uint32_t N = Sec.reserved1;
630 for (uint32_t J = 0; J < Cnt && N + J < Dysymtab.nindirectsyms; J++) {
631 uint32_t IndirectSymbol =
632 O.getIndirectSymbolTableEntry(Dysymtab, N + J);
633 uint64_t Addr = Sec.addr + J * Stride;
634 if (IndirectSymbol < Symtab.nsyms) {
635 object::SymbolRef Symbol = *(O.getSymbolByIndex(IndirectSymbol));
636 Expected<StringRef> Name = Symbol.getName();
637 failIfError(Name);
638 if (isCoveragePointSymbol(Name.get())) {
639 Result->insert(Addr);
646 if (Load.C.cmd == MachO::LC_SEGMENT) {
647 errs() << "ERROR: 32 bit MachO binaries not supported\n";
652 // Locate __sanitizer_cov* function addresses that are used for coverage
653 // reporting.
654 static std::set<uint64_t>
655 findSanitizerCovFunctions(const object::ObjectFile &O) {
656 std::set<uint64_t> Result;
658 for (const object::SymbolRef &Symbol : O.symbols()) {
659 Expected<uint64_t> AddressOrErr = Symbol.getAddress();
660 failIfError(AddressOrErr);
661 uint64_t Address = AddressOrErr.get();
663 Expected<StringRef> NameOrErr = Symbol.getName();
664 failIfError(NameOrErr);
665 StringRef Name = NameOrErr.get();
667 Expected<uint32_t> FlagsOrErr = Symbol.getFlags();
668 // TODO: Test this error.
669 failIfError(FlagsOrErr);
670 uint32_t Flags = FlagsOrErr.get();
672 if (!(Flags & object::BasicSymbolRef::SF_Undefined) &&
673 isCoveragePointSymbol(Name)) {
674 Result.insert(Address);
678 if (const auto *CO = dyn_cast<object::COFFObjectFile>(&O)) {
679 for (const object::ExportDirectoryEntryRef &Export :
680 CO->export_directories()) {
681 uint32_t RVA;
682 failIfError(Export.getExportRVA(RVA));
684 StringRef Name;
685 failIfError(Export.getSymbolName(Name));
687 if (isCoveragePointSymbol(Name))
688 Result.insert(CO->getImageBase() + RVA);
692 if (const auto *MO = dyn_cast<object::MachOObjectFile>(&O)) {
693 findMachOIndirectCovFunctions(*MO, &Result);
696 return Result;
699 // Ported from
700 // compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h:GetPreviousInstructionPc
701 // GetPreviousInstructionPc.
702 static uint64_t getPreviousInstructionPc(uint64_t PC,
703 Triple TheTriple) {
704 if (TheTriple.isARM())
705 return (PC - 3) & (~1);
706 if (TheTriple.isMIPS() || TheTriple.isSPARC())
707 return PC - 8;
708 if (TheTriple.isRISCV())
709 return PC - 2;
710 if (TheTriple.isX86() || TheTriple.isSystemZ())
711 return PC - 1;
712 return PC - 4;
715 // Locate addresses of all coverage points in a file. Coverage point
716 // is defined as the 'address of instruction following __sanitizer_cov
717 // call - 1'.
718 static void getObjectCoveragePoints(const object::ObjectFile &O,
719 std::set<uint64_t> *Addrs) {
720 Triple TheTriple("unknown-unknown-unknown");
721 TheTriple.setArch(Triple::ArchType(O.getArch()));
722 auto TripleName = TheTriple.getTriple();
724 std::string Error;
725 const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
726 failIfNotEmpty(Error);
728 std::unique_ptr<const MCSubtargetInfo> STI(
729 TheTarget->createMCSubtargetInfo(TripleName, "", ""));
730 failIfEmpty(STI, "no subtarget info for target " + TripleName);
732 std::unique_ptr<const MCRegisterInfo> MRI(
733 TheTarget->createMCRegInfo(TripleName));
734 failIfEmpty(MRI, "no register info for target " + TripleName);
736 MCTargetOptions MCOptions;
737 std::unique_ptr<const MCAsmInfo> AsmInfo(
738 TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
739 failIfEmpty(AsmInfo, "no asm info for target " + TripleName);
741 MCContext Ctx(TheTriple, AsmInfo.get(), MRI.get(), STI.get());
742 std::unique_ptr<MCDisassembler> DisAsm(
743 TheTarget->createMCDisassembler(*STI, Ctx));
744 failIfEmpty(DisAsm, "no disassembler info for target " + TripleName);
746 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
747 failIfEmpty(MII, "no instruction info for target " + TripleName);
749 std::unique_ptr<const MCInstrAnalysis> MIA(
750 TheTarget->createMCInstrAnalysis(MII.get()));
751 failIfEmpty(MIA, "no instruction analysis info for target " + TripleName);
753 auto SanCovAddrs = findSanitizerCovFunctions(O);
754 if (SanCovAddrs.empty())
755 fail("__sanitizer_cov* functions not found");
757 for (object::SectionRef Section : O.sections()) {
758 if (Section.isVirtual() || !Section.isText()) // llvm-objdump does the same.
759 continue;
760 uint64_t SectionAddr = Section.getAddress();
761 uint64_t SectSize = Section.getSize();
762 if (!SectSize)
763 continue;
765 Expected<StringRef> BytesStr = Section.getContents();
766 failIfError(BytesStr);
767 ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(*BytesStr);
769 for (uint64_t Index = 0, Size = 0; Index < Section.getSize();
770 Index += Size) {
771 MCInst Inst;
772 ArrayRef<uint8_t> ThisBytes = Bytes.slice(Index);
773 uint64_t ThisAddr = SectionAddr + Index;
774 if (!DisAsm->getInstruction(Inst, Size, ThisBytes, ThisAddr, nulls())) {
775 if (Size == 0)
776 Size = std::min<uint64_t>(
777 ThisBytes.size(),
778 DisAsm->suggestBytesToSkip(ThisBytes, ThisAddr));
779 continue;
781 uint64_t Addr = Index + SectionAddr;
782 // Sanitizer coverage uses the address of the next instruction - 1.
783 uint64_t CovPoint = getPreviousInstructionPc(Addr + Size, TheTriple);
784 uint64_t Target;
785 if (MIA->isCall(Inst) &&
786 MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target) &&
787 SanCovAddrs.find(Target) != SanCovAddrs.end())
788 Addrs->insert(CovPoint);
793 static void
794 visitObjectFiles(const object::Archive &A,
795 function_ref<void(const object::ObjectFile &)> Fn) {
796 Error Err = Error::success();
797 for (auto &C : A.children(Err)) {
798 Expected<std::unique_ptr<object::Binary>> ChildOrErr = C.getAsBinary();
799 failIfError(ChildOrErr);
800 if (auto *O = dyn_cast<object::ObjectFile>(&*ChildOrErr.get()))
801 Fn(*O);
802 else
803 failIfError(object::object_error::invalid_file_type);
805 failIfError(std::move(Err));
808 static void
809 visitObjectFiles(const std::string &FileName,
810 function_ref<void(const object::ObjectFile &)> Fn) {
811 Expected<object::OwningBinary<object::Binary>> BinaryOrErr =
812 object::createBinary(FileName);
813 if (!BinaryOrErr)
814 failIfError(BinaryOrErr);
816 object::Binary &Binary = *BinaryOrErr.get().getBinary();
817 if (object::Archive *A = dyn_cast<object::Archive>(&Binary))
818 visitObjectFiles(*A, Fn);
819 else if (object::ObjectFile *O = dyn_cast<object::ObjectFile>(&Binary))
820 Fn(*O);
821 else
822 failIfError(object::object_error::invalid_file_type);
825 static std::set<uint64_t>
826 findSanitizerCovFunctions(const std::string &FileName) {
827 std::set<uint64_t> Result;
828 visitObjectFiles(FileName, [&](const object::ObjectFile &O) {
829 auto Addrs = findSanitizerCovFunctions(O);
830 Result.insert(Addrs.begin(), Addrs.end());
832 return Result;
835 // Locate addresses of all coverage points in a file. Coverage point
836 // is defined as the 'address of instruction following __sanitizer_cov
837 // call - 1'.
838 static std::set<uint64_t> findCoveragePointAddrs(const std::string &FileName) {
839 std::set<uint64_t> Result;
840 visitObjectFiles(FileName, [&](const object::ObjectFile &O) {
841 getObjectCoveragePoints(O, &Result);
843 return Result;
846 static void printCovPoints(const std::string &ObjFile, raw_ostream &OS) {
847 for (uint64_t Addr : findCoveragePointAddrs(ObjFile)) {
848 OS << "0x";
849 OS.write_hex(Addr);
850 OS << "\n";
854 static ErrorOr<bool> isCoverageFile(const std::string &FileName) {
855 auto ShortFileName = llvm::sys::path::filename(FileName);
856 if (!SancovFileRegex.match(ShortFileName))
857 return false;
859 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
860 MemoryBuffer::getFile(FileName);
861 if (!BufOrErr) {
862 errs() << "Warning: " << BufOrErr.getError().message() << "("
863 << BufOrErr.getError().value()
864 << "), filename: " << llvm::sys::path::filename(FileName) << "\n";
865 return BufOrErr.getError();
867 std::unique_ptr<MemoryBuffer> Buf = std::move(BufOrErr.get());
868 if (Buf->getBufferSize() < 8) {
869 return false;
871 const FileHeader *Header =
872 reinterpret_cast<const FileHeader *>(Buf->getBufferStart());
873 return Header->Magic == BinCoverageMagic;
876 static bool isSymbolizedCoverageFile(const std::string &FileName) {
877 auto ShortFileName = llvm::sys::path::filename(FileName);
878 return SymcovFileRegex.match(ShortFileName);
881 static std::unique_ptr<SymbolizedCoverage>
882 symbolize(const RawCoverage &Data, const std::string ObjectFile) {
883 auto Coverage = std::make_unique<SymbolizedCoverage>();
885 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
886 MemoryBuffer::getFile(ObjectFile);
887 failIfError(BufOrErr);
888 SHA1 Hasher;
889 Hasher.update((*BufOrErr)->getBuffer());
890 Coverage->BinaryHash = toHex(Hasher.final());
892 Ignorelists Ig;
893 auto Symbolizer(createSymbolizer());
895 for (uint64_t Addr : *Data.Addrs) {
896 // TODO: it would be neccessary to set proper section index here.
897 // object::SectionedAddress::UndefSection works for only absolute addresses.
898 auto LineInfo = Symbolizer->symbolizeCode(
899 ObjectFile, {Addr, object::SectionedAddress::UndefSection});
900 failIfError(LineInfo);
901 if (Ig.isIgnorelisted(*LineInfo))
902 continue;
904 Coverage->CoveredIds.insert(utohexstr(Addr, true));
907 std::set<uint64_t> AllAddrs = findCoveragePointAddrs(ObjectFile);
908 if (!std::includes(AllAddrs.begin(), AllAddrs.end(), Data.Addrs->begin(),
909 Data.Addrs->end())) {
910 fail("Coverage points in binary and .sancov file do not match.");
912 Coverage->Points = getCoveragePoints(ObjectFile, AllAddrs, *Data.Addrs);
913 return Coverage;
916 struct FileFn {
917 bool operator<(const FileFn &RHS) const {
918 return std::tie(FileName, FunctionName) <
919 std::tie(RHS.FileName, RHS.FunctionName);
922 std::string FileName;
923 std::string FunctionName;
926 static std::set<FileFn>
927 computeFunctions(const std::vector<CoveragePoint> &Points) {
928 std::set<FileFn> Fns;
929 for (const auto &Point : Points) {
930 for (const auto &Loc : Point.Locs) {
931 Fns.insert(FileFn{Loc.FileName, Loc.FunctionName});
934 return Fns;
937 static std::set<FileFn>
938 computeNotCoveredFunctions(const SymbolizedCoverage &Coverage) {
939 auto Fns = computeFunctions(Coverage.Points);
941 for (const auto &Point : Coverage.Points) {
942 if (Coverage.CoveredIds.find(Point.Id) == Coverage.CoveredIds.end())
943 continue;
945 for (const auto &Loc : Point.Locs) {
946 Fns.erase(FileFn{Loc.FileName, Loc.FunctionName});
950 return Fns;
953 static std::set<FileFn>
954 computeCoveredFunctions(const SymbolizedCoverage &Coverage) {
955 auto AllFns = computeFunctions(Coverage.Points);
956 std::set<FileFn> Result;
958 for (const auto &Point : Coverage.Points) {
959 if (Coverage.CoveredIds.find(Point.Id) == Coverage.CoveredIds.end())
960 continue;
962 for (const auto &Loc : Point.Locs) {
963 Result.insert(FileFn{Loc.FileName, Loc.FunctionName});
967 return Result;
970 typedef std::map<FileFn, std::pair<uint32_t, uint32_t>> FunctionLocs;
971 // finds first location in a file for each function.
972 static FunctionLocs resolveFunctions(const SymbolizedCoverage &Coverage,
973 const std::set<FileFn> &Fns) {
974 FunctionLocs Result;
975 for (const auto &Point : Coverage.Points) {
976 for (const auto &Loc : Point.Locs) {
977 FileFn Fn = FileFn{Loc.FileName, Loc.FunctionName};
978 if (Fns.find(Fn) == Fns.end())
979 continue;
981 auto P = std::make_pair(Loc.Line, Loc.Column);
982 auto I = Result.find(Fn);
983 if (I == Result.end() || I->second > P) {
984 Result[Fn] = P;
988 return Result;
991 static void printFunctionLocs(const FunctionLocs &FnLocs, raw_ostream &OS) {
992 for (const auto &P : FnLocs) {
993 OS << stripPathPrefix(P.first.FileName) << ":" << P.second.first << " "
994 << P.first.FunctionName << "\n";
997 CoverageStats computeStats(const SymbolizedCoverage &Coverage) {
998 CoverageStats Stats = {Coverage.Points.size(), Coverage.CoveredIds.size(),
999 computeFunctions(Coverage.Points).size(),
1000 computeCoveredFunctions(Coverage).size()};
1001 return Stats;
1004 // Print list of covered functions.
1005 // Line format: <file_name>:<line> <function_name>
1006 static void printCoveredFunctions(const SymbolizedCoverage &CovData,
1007 raw_ostream &OS) {
1008 auto CoveredFns = computeCoveredFunctions(CovData);
1009 printFunctionLocs(resolveFunctions(CovData, CoveredFns), OS);
1012 // Print list of not covered functions.
1013 // Line format: <file_name>:<line> <function_name>
1014 static void printNotCoveredFunctions(const SymbolizedCoverage &CovData,
1015 raw_ostream &OS) {
1016 auto NotCoveredFns = computeNotCoveredFunctions(CovData);
1017 printFunctionLocs(resolveFunctions(CovData, NotCoveredFns), OS);
1020 // Read list of files and merges their coverage info.
1021 static void readAndPrintRawCoverage(const std::vector<std::string> &FileNames,
1022 raw_ostream &OS) {
1023 std::vector<std::unique_ptr<RawCoverage>> Covs;
1024 for (const auto &FileName : FileNames) {
1025 auto Cov = RawCoverage::read(FileName);
1026 if (!Cov)
1027 continue;
1028 OS << *Cov.get();
1032 static std::unique_ptr<SymbolizedCoverage>
1033 merge(const std::vector<std::unique_ptr<SymbolizedCoverage>> &Coverages) {
1034 if (Coverages.empty())
1035 return nullptr;
1037 auto Result = std::make_unique<SymbolizedCoverage>();
1039 for (size_t I = 0; I < Coverages.size(); ++I) {
1040 const SymbolizedCoverage &Coverage = *Coverages[I];
1041 std::string Prefix;
1042 if (Coverages.size() > 1) {
1043 // prefix is not needed when there's only one file.
1044 Prefix = utostr(I);
1047 for (const auto &Id : Coverage.CoveredIds) {
1048 Result->CoveredIds.insert(Prefix + Id);
1051 for (const auto &CovPoint : Coverage.Points) {
1052 CoveragePoint NewPoint(CovPoint);
1053 NewPoint.Id = Prefix + CovPoint.Id;
1054 Result->Points.push_back(NewPoint);
1058 if (Coverages.size() == 1) {
1059 Result->BinaryHash = Coverages[0]->BinaryHash;
1062 return Result;
1065 static std::unique_ptr<SymbolizedCoverage>
1066 readSymbolizeAndMergeCmdArguments(std::vector<std::string> FileNames) {
1067 std::vector<std::unique_ptr<SymbolizedCoverage>> Coverages;
1070 // Short name => file name.
1071 std::map<std::string, std::string> ObjFiles;
1072 std::string FirstObjFile;
1073 std::set<std::string> CovFiles;
1075 // Partition input values into coverage/object files.
1076 for (const auto &FileName : FileNames) {
1077 if (isSymbolizedCoverageFile(FileName)) {
1078 Coverages.push_back(SymbolizedCoverage::read(FileName));
1081 auto ErrorOrIsCoverage = isCoverageFile(FileName);
1082 if (!ErrorOrIsCoverage)
1083 continue;
1084 if (ErrorOrIsCoverage.get()) {
1085 CovFiles.insert(FileName);
1086 } else {
1087 auto ShortFileName = llvm::sys::path::filename(FileName);
1088 if (ObjFiles.find(std::string(ShortFileName)) != ObjFiles.end()) {
1089 fail("Duplicate binary file with a short name: " + ShortFileName);
1092 ObjFiles[std::string(ShortFileName)] = FileName;
1093 if (FirstObjFile.empty())
1094 FirstObjFile = FileName;
1098 SmallVector<StringRef, 2> Components;
1100 // Object file => list of corresponding coverage file names.
1101 std::map<std::string, std::vector<std::string>> CoverageByObjFile;
1102 for (const auto &FileName : CovFiles) {
1103 auto ShortFileName = llvm::sys::path::filename(FileName);
1104 auto Ok = SancovFileRegex.match(ShortFileName, &Components);
1105 if (!Ok) {
1106 fail("Can't match coverage file name against "
1107 "<module_name>.<pid>.sancov pattern: " +
1108 FileName);
1111 auto Iter = ObjFiles.find(std::string(Components[1]));
1112 if (Iter == ObjFiles.end()) {
1113 fail("Object file for coverage not found: " + FileName);
1116 CoverageByObjFile[Iter->second].push_back(FileName);
1119 for (const auto &Pair : ObjFiles) {
1120 auto FileName = Pair.second;
1121 if (CoverageByObjFile.find(FileName) == CoverageByObjFile.end())
1122 errs() << "WARNING: No coverage file for " << FileName << "\n";
1125 // Read raw coverage and symbolize it.
1126 for (const auto &Pair : CoverageByObjFile) {
1127 if (findSanitizerCovFunctions(Pair.first).empty()) {
1128 errs()
1129 << "WARNING: Ignoring " << Pair.first
1130 << " and its coverage because __sanitizer_cov* functions were not "
1131 "found.\n";
1132 continue;
1135 for (const std::string &CoverageFile : Pair.second) {
1136 auto DataOrError = RawCoverage::read(CoverageFile);
1137 failIfError(DataOrError);
1138 Coverages.push_back(symbolize(*DataOrError.get(), Pair.first));
1143 return merge(Coverages);
1146 } // namespace
1148 int main(int Argc, char **Argv) {
1149 llvm::InitLLVM X(Argc, Argv);
1150 cl::HideUnrelatedOptions(Cat);
1152 llvm::InitializeAllTargetInfos();
1153 llvm::InitializeAllTargetMCs();
1154 llvm::InitializeAllDisassemblers();
1156 cl::ParseCommandLineOptions(Argc, Argv,
1157 "Sanitizer Coverage Processing Tool (sancov)\n\n"
1158 " This tool can extract various coverage-related information from: \n"
1159 " coverage-instrumented binary files, raw .sancov files and their "
1160 "symbolized .symcov version.\n"
1161 " Depending on chosen action the tool expects different input files:\n"
1162 " -print-coverage-pcs - coverage-instrumented binary files\n"
1163 " -print-coverage - .sancov files\n"
1164 " <other actions> - .sancov files & corresponding binary "
1165 "files, .symcov files\n"
1168 // -print doesn't need object files.
1169 if (Action == PrintAction) {
1170 readAndPrintRawCoverage(ClInputFiles, outs());
1171 return 0;
1172 } else if (Action == PrintCovPointsAction) {
1173 // -print-coverage-points doesn't need coverage files.
1174 for (const std::string &ObjFile : ClInputFiles) {
1175 printCovPoints(ObjFile, outs());
1177 return 0;
1180 auto Coverage = readSymbolizeAndMergeCmdArguments(ClInputFiles);
1181 failIf(!Coverage, "No valid coverage files given.");
1183 switch (Action) {
1184 case CoveredFunctionsAction: {
1185 printCoveredFunctions(*Coverage, outs());
1186 return 0;
1188 case NotCoveredFunctionsAction: {
1189 printNotCoveredFunctions(*Coverage, outs());
1190 return 0;
1192 case StatsAction: {
1193 outs() << computeStats(*Coverage);
1194 return 0;
1196 case MergeAction:
1197 case SymbolizeAction: { // merge & symbolize are synonims.
1198 json::OStream W(outs(), 2);
1199 W << *Coverage;
1200 return 0;
1202 case HtmlReportAction:
1203 errs() << "-html-report option is removed: "
1204 "use -symbolize & coverage-report-server.py instead\n";
1205 return 1;
1206 case PrintAction:
1207 case PrintCovPointsAction:
1208 llvm_unreachable("unsupported action");