[PowerPC] Do not use vectors to codegen bswap with Altivec turned off
[llvm-core.git] / tools / sancov / sancov.cpp
blobe8935d1be21251da146d1a02de444665bb7e4876
1 //===-- sancov.cpp --------------------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 // This file is a command-line tool for reading and analyzing sanitizer
10 // coverage.
11 //===----------------------------------------------------------------------===//
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/ADT/StringExtras.h"
14 #include "llvm/ADT/Twine.h"
15 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
16 #include "llvm/MC/MCAsmInfo.h"
17 #include "llvm/MC/MCContext.h"
18 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
19 #include "llvm/MC/MCInst.h"
20 #include "llvm/MC/MCInstrAnalysis.h"
21 #include "llvm/MC/MCInstrInfo.h"
22 #include "llvm/MC/MCObjectFileInfo.h"
23 #include "llvm/MC/MCRegisterInfo.h"
24 #include "llvm/MC/MCSubtargetInfo.h"
25 #include "llvm/Object/Archive.h"
26 #include "llvm/Object/Binary.h"
27 #include "llvm/Object/COFF.h"
28 #include "llvm/Object/MachO.h"
29 #include "llvm/Object/ObjectFile.h"
30 #include "llvm/Support/Casting.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Errc.h"
33 #include "llvm/Support/ErrorOr.h"
34 #include "llvm/Support/FileSystem.h"
35 #include "llvm/Support/MD5.h"
36 #include "llvm/Support/ManagedStatic.h"
37 #include "llvm/Support/MemoryBuffer.h"
38 #include "llvm/Support/Path.h"
39 #include "llvm/Support/PrettyStackTrace.h"
40 #include "llvm/Support/Regex.h"
41 #include "llvm/Support/SHA1.h"
42 #include "llvm/Support/Signals.h"
43 #include "llvm/Support/SourceMgr.h"
44 #include "llvm/Support/SpecialCaseList.h"
45 #include "llvm/Support/TargetRegistry.h"
46 #include "llvm/Support/TargetSelect.h"
47 #include "llvm/Support/YAMLParser.h"
48 #include "llvm/Support/raw_ostream.h"
50 #include <set>
51 #include <vector>
53 using namespace llvm;
55 namespace {
57 // --------- COMMAND LINE FLAGS ---------
59 enum ActionType {
60 CoveredFunctionsAction,
61 HtmlReportAction,
62 MergeAction,
63 NotCoveredFunctionsAction,
64 PrintAction,
65 PrintCovPointsAction,
66 StatsAction,
67 SymbolizeAction
70 cl::opt<ActionType> Action(
71 cl::desc("Action (required)"), cl::Required,
72 cl::values(
73 clEnumValN(PrintAction, "print", "Print coverage addresses"),
74 clEnumValN(PrintCovPointsAction, "print-coverage-pcs",
75 "Print coverage instrumentation points addresses."),
76 clEnumValN(CoveredFunctionsAction, "covered-functions",
77 "Print all covered funcions."),
78 clEnumValN(NotCoveredFunctionsAction, "not-covered-functions",
79 "Print all not covered funcions."),
80 clEnumValN(StatsAction, "print-coverage-stats",
81 "Print coverage statistics."),
82 clEnumValN(HtmlReportAction, "html-report",
83 "REMOVED. Use -symbolize & coverage-report-server.py."),
84 clEnumValN(SymbolizeAction, "symbolize",
85 "Produces a symbolized JSON report from binary report."),
86 clEnumValN(MergeAction, "merge", "Merges reports.")));
88 static cl::list<std::string>
89 ClInputFiles(cl::Positional, cl::OneOrMore,
90 cl::desc("<action> <binary files...> <.sancov files...> "
91 "<.symcov files...>"));
93 static cl::opt<bool> ClDemangle("demangle", cl::init(true),
94 cl::desc("Print demangled function name."));
96 static cl::opt<bool>
97 ClSkipDeadFiles("skip-dead-files", cl::init(true),
98 cl::desc("Do not list dead source files in reports."));
100 static cl::opt<std::string> ClStripPathPrefix(
101 "strip_path_prefix", cl::init(""),
102 cl::desc("Strip this prefix from file paths in reports."));
104 static cl::opt<std::string>
105 ClBlacklist("blacklist", cl::init(""),
106 cl::desc("Blacklist file (sanitizer blacklist format)."));
108 static cl::opt<bool> ClUseDefaultBlacklist(
109 "use_default_blacklist", cl::init(true), cl::Hidden,
110 cl::desc("Controls if default blacklist should be used."));
112 static const char *const DefaultBlacklistStr = "fun:__sanitizer_.*\n"
113 "src:/usr/include/.*\n"
114 "src:.*/libc\\+\\+/.*\n";
116 // --------- FORMAT SPECIFICATION ---------
118 struct FileHeader {
119 uint32_t Bitness;
120 uint32_t Magic;
123 static const uint32_t BinCoverageMagic = 0xC0BFFFFF;
124 static const uint32_t Bitness32 = 0xFFFFFF32;
125 static const uint32_t Bitness64 = 0xFFFFFF64;
127 static Regex SancovFileRegex("(.*)\\.[0-9]+\\.sancov");
128 static Regex SymcovFileRegex(".*\\.symcov");
130 // --------- MAIN DATASTRUCTURES ----------
132 // Contents of .sancov file: list of coverage point addresses that were
133 // executed.
134 struct RawCoverage {
135 explicit RawCoverage(std::unique_ptr<std::set<uint64_t>> Addrs)
136 : Addrs(std::move(Addrs)) {}
138 // Read binary .sancov file.
139 static ErrorOr<std::unique_ptr<RawCoverage>>
140 read(const std::string &FileName);
142 std::unique_ptr<std::set<uint64_t>> Addrs;
145 // Coverage point has an opaque Id and corresponds to multiple source locations.
146 struct CoveragePoint {
147 explicit CoveragePoint(const std::string &Id) : Id(Id) {}
149 std::string Id;
150 SmallVector<DILineInfo, 1> Locs;
153 // Symcov file content: set of covered Ids plus information about all available
154 // coverage points.
155 struct SymbolizedCoverage {
156 // Read json .symcov file.
157 static std::unique_ptr<SymbolizedCoverage> read(const std::string &InputFile);
159 std::set<std::string> CoveredIds;
160 std::string BinaryHash;
161 std::vector<CoveragePoint> Points;
164 struct CoverageStats {
165 size_t AllPoints;
166 size_t CovPoints;
167 size_t AllFns;
168 size_t CovFns;
171 // --------- ERROR HANDLING ---------
173 static void fail(const llvm::Twine &E) {
174 errs() << "ERROR: " << E << "\n";
175 exit(1);
178 static void failIf(bool B, const llvm::Twine &E) {
179 if (B)
180 fail(E);
183 static void failIfError(std::error_code Error) {
184 if (!Error)
185 return;
186 errs() << "ERROR: " << Error.message() << "(" << Error.value() << ")\n";
187 exit(1);
190 template <typename T> static void failIfError(const ErrorOr<T> &E) {
191 failIfError(E.getError());
194 static void failIfError(Error Err) {
195 if (Err) {
196 logAllUnhandledErrors(std::move(Err), errs(), "ERROR: ");
197 exit(1);
201 template <typename T> static void failIfError(Expected<T> &E) {
202 failIfError(E.takeError());
205 static void failIfNotEmpty(const llvm::Twine &E) {
206 if (E.str().empty())
207 return;
208 fail(E);
211 template <typename T>
212 static void failIfEmpty(const std::unique_ptr<T> &Ptr,
213 const std::string &Message) {
214 if (Ptr.get())
215 return;
216 fail(Message);
219 // ----------- Coverage I/O ----------
220 template <typename T>
221 static void readInts(const char *Start, const char *End,
222 std::set<uint64_t> *Ints) {
223 const T *S = reinterpret_cast<const T *>(Start);
224 const T *E = reinterpret_cast<const T *>(End);
225 std::copy(S, E, std::inserter(*Ints, Ints->end()));
228 ErrorOr<std::unique_ptr<RawCoverage>>
229 RawCoverage::read(const std::string &FileName) {
230 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
231 MemoryBuffer::getFile(FileName);
232 if (!BufOrErr)
233 return BufOrErr.getError();
234 std::unique_ptr<MemoryBuffer> Buf = std::move(BufOrErr.get());
235 if (Buf->getBufferSize() < 8) {
236 errs() << "File too small (<8): " << Buf->getBufferSize() << '\n';
237 return make_error_code(errc::illegal_byte_sequence);
239 const FileHeader *Header =
240 reinterpret_cast<const FileHeader *>(Buf->getBufferStart());
242 if (Header->Magic != BinCoverageMagic) {
243 errs() << "Wrong magic: " << Header->Magic << '\n';
244 return make_error_code(errc::illegal_byte_sequence);
247 auto Addrs = llvm::make_unique<std::set<uint64_t>>();
249 switch (Header->Bitness) {
250 case Bitness64:
251 readInts<uint64_t>(Buf->getBufferStart() + 8, Buf->getBufferEnd(),
252 Addrs.get());
253 break;
254 case Bitness32:
255 readInts<uint32_t>(Buf->getBufferStart() + 8, Buf->getBufferEnd(),
256 Addrs.get());
257 break;
258 default:
259 errs() << "Unsupported bitness: " << Header->Bitness << '\n';
260 return make_error_code(errc::illegal_byte_sequence);
263 return std::unique_ptr<RawCoverage>(new RawCoverage(std::move(Addrs)));
266 // Print coverage addresses.
267 raw_ostream &operator<<(raw_ostream &OS, const RawCoverage &CoverageData) {
268 for (auto Addr : *CoverageData.Addrs) {
269 OS << "0x";
270 OS.write_hex(Addr);
271 OS << "\n";
273 return OS;
276 static raw_ostream &operator<<(raw_ostream &OS, const CoverageStats &Stats) {
277 OS << "all-edges: " << Stats.AllPoints << "\n";
278 OS << "cov-edges: " << Stats.CovPoints << "\n";
279 OS << "all-functions: " << Stats.AllFns << "\n";
280 OS << "cov-functions: " << Stats.CovFns << "\n";
281 return OS;
284 // Helper for writing out JSON. Handles indents and commas using
285 // scope variables for objects and arrays.
286 class JSONWriter {
287 public:
288 JSONWriter(raw_ostream &Out) : OS(Out) {}
289 JSONWriter(const JSONWriter &) = delete;
290 ~JSONWriter() { OS << "\n"; }
292 void operator<<(StringRef S) { printJSONStringLiteral(S, OS); }
294 // Helper RAII class to output JSON objects.
295 class Object {
296 public:
297 Object(JSONWriter *W, raw_ostream &OS) : W(W), OS(OS) {
298 OS << "{";
299 W->Indent++;
301 Object(const Object &) = delete;
302 ~Object() {
303 W->Indent--;
304 OS << "\n";
305 W->indent();
306 OS << "}";
309 void key(StringRef Key) {
310 Index++;
311 if (Index > 0)
312 OS << ",";
313 OS << "\n";
314 W->indent();
315 printJSONStringLiteral(Key, OS);
316 OS << " : ";
319 private:
320 JSONWriter *W;
321 raw_ostream &OS;
322 int Index = -1;
325 std::unique_ptr<Object> object() { return make_unique<Object>(this, OS); }
327 // Helper RAII class to output JSON arrays.
328 class Array {
329 public:
330 Array(raw_ostream &OS) : OS(OS) { OS << "["; }
331 Array(const Array &) = delete;
332 ~Array() { OS << "]"; }
333 void next() {
334 Index++;
335 if (Index > 0)
336 OS << ", ";
339 private:
340 raw_ostream &OS;
341 int Index = -1;
344 std::unique_ptr<Array> array() { return make_unique<Array>(OS); }
346 private:
347 void indent() { OS.indent(Indent * 2); }
349 static void printJSONStringLiteral(StringRef S, raw_ostream &OS) {
350 if (S.find('"') == std::string::npos) {
351 OS << "\"" << S << "\"";
352 return;
354 OS << "\"";
355 for (char Ch : S.bytes()) {
356 if (Ch == '"')
357 OS << "\\";
358 OS << Ch;
360 OS << "\"";
363 raw_ostream &OS;
364 int Indent = 0;
367 // Output symbolized information for coverage points in JSON.
368 // Format:
369 // {
370 // '<file_name>' : {
371 // '<function_name>' : {
372 // '<point_id'> : '<line_number>:'<column_number'.
373 // ....
374 // }
375 // }
376 // }
377 static void operator<<(JSONWriter &W,
378 const std::vector<CoveragePoint> &Points) {
379 // Group points by file.
380 auto ByFile(W.object());
381 std::map<std::string, std::vector<const CoveragePoint *>> PointsByFile;
382 for (const auto &Point : Points) {
383 for (const DILineInfo &Loc : Point.Locs) {
384 PointsByFile[Loc.FileName].push_back(&Point);
388 for (const auto &P : PointsByFile) {
389 std::string FileName = P.first;
390 ByFile->key(FileName);
392 // Group points by function.
393 auto ByFn(W.object());
394 std::map<std::string, std::vector<const CoveragePoint *>> PointsByFn;
395 for (auto PointPtr : P.second) {
396 for (const DILineInfo &Loc : PointPtr->Locs) {
397 PointsByFn[Loc.FunctionName].push_back(PointPtr);
401 for (const auto &P : PointsByFn) {
402 std::string FunctionName = P.first;
403 std::set<std::string> WrittenIds;
405 ByFn->key(FunctionName);
407 // Output <point_id> : "<line>:<col>".
408 auto ById(W.object());
409 for (const CoveragePoint *Point : P.second) {
410 for (const auto &Loc : Point->Locs) {
411 if (Loc.FileName != FileName || Loc.FunctionName != FunctionName)
412 continue;
413 if (WrittenIds.find(Point->Id) != WrittenIds.end())
414 continue;
416 WrittenIds.insert(Point->Id);
417 ById->key(Point->Id);
418 W << (utostr(Loc.Line) + ":" + utostr(Loc.Column));
425 static void operator<<(JSONWriter &W, const SymbolizedCoverage &C) {
426 auto O(W.object());
429 O->key("covered-points");
430 auto PointsArray(W.array());
432 for (const auto &P : C.CoveredIds) {
433 PointsArray->next();
434 W << P;
439 if (!C.BinaryHash.empty()) {
440 O->key("binary-hash");
441 W << C.BinaryHash;
446 O->key("point-symbol-info");
447 W << C.Points;
451 static std::string parseScalarString(yaml::Node *N) {
452 SmallString<64> StringStorage;
453 yaml::ScalarNode *S = dyn_cast<yaml::ScalarNode>(N);
454 failIf(!S, "expected string");
455 return S->getValue(StringStorage);
458 std::unique_ptr<SymbolizedCoverage>
459 SymbolizedCoverage::read(const std::string &InputFile) {
460 auto Coverage(make_unique<SymbolizedCoverage>());
462 std::map<std::string, CoveragePoint> Points;
463 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
464 MemoryBuffer::getFile(InputFile);
465 failIfError(BufOrErr);
467 SourceMgr SM;
468 yaml::Stream S(**BufOrErr, SM);
470 yaml::document_iterator DI = S.begin();
471 failIf(DI == S.end(), "empty document: " + InputFile);
472 yaml::Node *Root = DI->getRoot();
473 failIf(!Root, "expecting root node: " + InputFile);
474 yaml::MappingNode *Top = dyn_cast<yaml::MappingNode>(Root);
475 failIf(!Top, "expecting mapping node: " + InputFile);
477 for (auto &KVNode : *Top) {
478 auto Key = parseScalarString(KVNode.getKey());
480 if (Key == "covered-points") {
481 yaml::SequenceNode *Points =
482 dyn_cast<yaml::SequenceNode>(KVNode.getValue());
483 failIf(!Points, "expected array: " + InputFile);
485 for (auto I = Points->begin(), E = Points->end(); I != E; ++I) {
486 Coverage->CoveredIds.insert(parseScalarString(&*I));
488 } else if (Key == "binary-hash") {
489 Coverage->BinaryHash = parseScalarString(KVNode.getValue());
490 } else if (Key == "point-symbol-info") {
491 yaml::MappingNode *PointSymbolInfo =
492 dyn_cast<yaml::MappingNode>(KVNode.getValue());
493 failIf(!PointSymbolInfo, "expected mapping node: " + InputFile);
495 for (auto &FileKVNode : *PointSymbolInfo) {
496 auto Filename = parseScalarString(FileKVNode.getKey());
498 yaml::MappingNode *FileInfo =
499 dyn_cast<yaml::MappingNode>(FileKVNode.getValue());
500 failIf(!FileInfo, "expected mapping node: " + InputFile);
502 for (auto &FunctionKVNode : *FileInfo) {
503 auto FunctionName = parseScalarString(FunctionKVNode.getKey());
505 yaml::MappingNode *FunctionInfo =
506 dyn_cast<yaml::MappingNode>(FunctionKVNode.getValue());
507 failIf(!FunctionInfo, "expected mapping node: " + InputFile);
509 for (auto &PointKVNode : *FunctionInfo) {
510 auto PointId = parseScalarString(PointKVNode.getKey());
511 auto Loc = parseScalarString(PointKVNode.getValue());
513 size_t ColonPos = Loc.find(':');
514 failIf(ColonPos == std::string::npos, "expected ':': " + InputFile);
516 auto LineStr = Loc.substr(0, ColonPos);
517 auto ColStr = Loc.substr(ColonPos + 1, Loc.size());
519 if (Points.find(PointId) == Points.end())
520 Points.insert(std::make_pair(PointId, CoveragePoint(PointId)));
522 DILineInfo LineInfo;
523 LineInfo.FileName = Filename;
524 LineInfo.FunctionName = FunctionName;
525 char *End;
526 LineInfo.Line = std::strtoul(LineStr.c_str(), &End, 10);
527 LineInfo.Column = std::strtoul(ColStr.c_str(), &End, 10);
529 CoveragePoint *CoveragePoint = &Points.find(PointId)->second;
530 CoveragePoint->Locs.push_back(LineInfo);
534 } else {
535 errs() << "Ignoring unknown key: " << Key << "\n";
539 for (auto &KV : Points) {
540 Coverage->Points.push_back(KV.second);
543 return Coverage;
546 // ---------- MAIN FUNCTIONALITY ----------
548 std::string stripPathPrefix(std::string Path) {
549 if (ClStripPathPrefix.empty())
550 return Path;
551 size_t Pos = Path.find(ClStripPathPrefix);
552 if (Pos == std::string::npos)
553 return Path;
554 return Path.substr(Pos + ClStripPathPrefix.size());
557 static std::unique_ptr<symbolize::LLVMSymbolizer> createSymbolizer() {
558 symbolize::LLVMSymbolizer::Options SymbolizerOptions;
559 SymbolizerOptions.Demangle = ClDemangle;
560 SymbolizerOptions.UseSymbolTable = true;
561 return std::unique_ptr<symbolize::LLVMSymbolizer>(
562 new symbolize::LLVMSymbolizer(SymbolizerOptions));
565 static std::string normalizeFilename(const std::string &FileName) {
566 SmallString<256> S(FileName);
567 sys::path::remove_dots(S, /* remove_dot_dot */ true);
568 return stripPathPrefix(S.str().str());
571 class Blacklists {
572 public:
573 Blacklists()
574 : DefaultBlacklist(createDefaultBlacklist()),
575 UserBlacklist(createUserBlacklist()) {}
577 bool isBlacklisted(const DILineInfo &I) {
578 if (DefaultBlacklist &&
579 DefaultBlacklist->inSection("sancov", "fun", I.FunctionName))
580 return true;
581 if (DefaultBlacklist &&
582 DefaultBlacklist->inSection("sancov", "src", I.FileName))
583 return true;
584 if (UserBlacklist &&
585 UserBlacklist->inSection("sancov", "fun", I.FunctionName))
586 return true;
587 if (UserBlacklist && UserBlacklist->inSection("sancov", "src", I.FileName))
588 return true;
589 return false;
592 private:
593 static std::unique_ptr<SpecialCaseList> createDefaultBlacklist() {
594 if (!ClUseDefaultBlacklist)
595 return std::unique_ptr<SpecialCaseList>();
596 std::unique_ptr<MemoryBuffer> MB =
597 MemoryBuffer::getMemBuffer(DefaultBlacklistStr);
598 std::string Error;
599 auto Blacklist = SpecialCaseList::create(MB.get(), Error);
600 failIfNotEmpty(Error);
601 return Blacklist;
604 static std::unique_ptr<SpecialCaseList> createUserBlacklist() {
605 if (ClBlacklist.empty())
606 return std::unique_ptr<SpecialCaseList>();
608 return SpecialCaseList::createOrDie({{ClBlacklist}});
610 std::unique_ptr<SpecialCaseList> DefaultBlacklist;
611 std::unique_ptr<SpecialCaseList> UserBlacklist;
614 static std::vector<CoveragePoint>
615 getCoveragePoints(const std::string &ObjectFile,
616 const std::set<uint64_t> &Addrs,
617 const std::set<uint64_t> &CoveredAddrs) {
618 std::vector<CoveragePoint> Result;
619 auto Symbolizer(createSymbolizer());
620 Blacklists B;
622 std::set<std::string> CoveredFiles;
623 if (ClSkipDeadFiles) {
624 for (auto Addr : CoveredAddrs) {
625 auto LineInfo = Symbolizer->symbolizeCode(ObjectFile, Addr);
626 failIfError(LineInfo);
627 CoveredFiles.insert(LineInfo->FileName);
628 auto InliningInfo = Symbolizer->symbolizeInlinedCode(ObjectFile, Addr);
629 failIfError(InliningInfo);
630 for (uint32_t I = 0; I < InliningInfo->getNumberOfFrames(); ++I) {
631 auto FrameInfo = InliningInfo->getFrame(I);
632 CoveredFiles.insert(FrameInfo.FileName);
637 for (auto Addr : Addrs) {
638 std::set<DILineInfo> Infos; // deduplicate debug info.
640 auto LineInfo = Symbolizer->symbolizeCode(ObjectFile, Addr);
641 failIfError(LineInfo);
642 if (ClSkipDeadFiles &&
643 CoveredFiles.find(LineInfo->FileName) == CoveredFiles.end())
644 continue;
645 LineInfo->FileName = normalizeFilename(LineInfo->FileName);
646 if (B.isBlacklisted(*LineInfo))
647 continue;
649 auto Id = utohexstr(Addr, true);
650 auto Point = CoveragePoint(Id);
651 Infos.insert(*LineInfo);
652 Point.Locs.push_back(*LineInfo);
654 auto InliningInfo = Symbolizer->symbolizeInlinedCode(ObjectFile, Addr);
655 failIfError(InliningInfo);
656 for (uint32_t I = 0; I < InliningInfo->getNumberOfFrames(); ++I) {
657 auto FrameInfo = InliningInfo->getFrame(I);
658 if (ClSkipDeadFiles &&
659 CoveredFiles.find(FrameInfo.FileName) == CoveredFiles.end())
660 continue;
661 FrameInfo.FileName = normalizeFilename(FrameInfo.FileName);
662 if (B.isBlacklisted(FrameInfo))
663 continue;
664 if (Infos.find(FrameInfo) == Infos.end()) {
665 Infos.insert(FrameInfo);
666 Point.Locs.push_back(FrameInfo);
670 Result.push_back(Point);
673 return Result;
676 static bool isCoveragePointSymbol(StringRef Name) {
677 return Name == "__sanitizer_cov" || Name == "__sanitizer_cov_with_check" ||
678 Name == "__sanitizer_cov_trace_func_enter" ||
679 Name == "__sanitizer_cov_trace_pc_guard" ||
680 // Mac has '___' prefix
681 Name == "___sanitizer_cov" || Name == "___sanitizer_cov_with_check" ||
682 Name == "___sanitizer_cov_trace_func_enter" ||
683 Name == "___sanitizer_cov_trace_pc_guard";
686 // Locate __sanitizer_cov* function addresses inside the stubs table on MachO.
687 static void findMachOIndirectCovFunctions(const object::MachOObjectFile &O,
688 std::set<uint64_t> *Result) {
689 MachO::dysymtab_command Dysymtab = O.getDysymtabLoadCommand();
690 MachO::symtab_command Symtab = O.getSymtabLoadCommand();
692 for (const auto &Load : O.load_commands()) {
693 if (Load.C.cmd == MachO::LC_SEGMENT_64) {
694 MachO::segment_command_64 Seg = O.getSegment64LoadCommand(Load);
695 for (unsigned J = 0; J < Seg.nsects; ++J) {
696 MachO::section_64 Sec = O.getSection64(Load, J);
698 uint32_t SectionType = Sec.flags & MachO::SECTION_TYPE;
699 if (SectionType == MachO::S_SYMBOL_STUBS) {
700 uint32_t Stride = Sec.reserved2;
701 uint32_t Cnt = Sec.size / Stride;
702 uint32_t N = Sec.reserved1;
703 for (uint32_t J = 0; J < Cnt && N + J < Dysymtab.nindirectsyms; J++) {
704 uint32_t IndirectSymbol =
705 O.getIndirectSymbolTableEntry(Dysymtab, N + J);
706 uint64_t Addr = Sec.addr + J * Stride;
707 if (IndirectSymbol < Symtab.nsyms) {
708 object::SymbolRef Symbol = *(O.getSymbolByIndex(IndirectSymbol));
709 Expected<StringRef> Name = Symbol.getName();
710 failIfError(Name);
711 if (isCoveragePointSymbol(Name.get())) {
712 Result->insert(Addr);
719 if (Load.C.cmd == MachO::LC_SEGMENT) {
720 errs() << "ERROR: 32 bit MachO binaries not supported\n";
725 // Locate __sanitizer_cov* function addresses that are used for coverage
726 // reporting.
727 static std::set<uint64_t>
728 findSanitizerCovFunctions(const object::ObjectFile &O) {
729 std::set<uint64_t> Result;
731 for (const object::SymbolRef &Symbol : O.symbols()) {
732 Expected<uint64_t> AddressOrErr = Symbol.getAddress();
733 failIfError(AddressOrErr);
734 uint64_t Address = AddressOrErr.get();
736 Expected<StringRef> NameOrErr = Symbol.getName();
737 failIfError(NameOrErr);
738 StringRef Name = NameOrErr.get();
740 if (!(Symbol.getFlags() & object::BasicSymbolRef::SF_Undefined) &&
741 isCoveragePointSymbol(Name)) {
742 Result.insert(Address);
746 if (const auto *CO = dyn_cast<object::COFFObjectFile>(&O)) {
747 for (const object::ExportDirectoryEntryRef &Export :
748 CO->export_directories()) {
749 uint32_t RVA;
750 std::error_code EC = Export.getExportRVA(RVA);
751 failIfError(EC);
753 StringRef Name;
754 EC = Export.getSymbolName(Name);
755 failIfError(EC);
757 if (isCoveragePointSymbol(Name))
758 Result.insert(CO->getImageBase() + RVA);
762 if (const auto *MO = dyn_cast<object::MachOObjectFile>(&O)) {
763 findMachOIndirectCovFunctions(*MO, &Result);
766 return Result;
769 static uint64_t getPreviousInstructionPc(uint64_t PC,
770 Triple TheTriple) {
771 if (TheTriple.isARM()) {
772 return (PC - 3) & (~1);
773 } else if (TheTriple.isAArch64()) {
774 return PC - 4;
775 } else if (TheTriple.isMIPS()) {
776 return PC - 8;
777 } else {
778 return PC - 1;
782 // Locate addresses of all coverage points in a file. Coverage point
783 // is defined as the 'address of instruction following __sanitizer_cov
784 // call - 1'.
785 static void getObjectCoveragePoints(const object::ObjectFile &O,
786 std::set<uint64_t> *Addrs) {
787 Triple TheTriple("unknown-unknown-unknown");
788 TheTriple.setArch(Triple::ArchType(O.getArch()));
789 auto TripleName = TheTriple.getTriple();
791 std::string Error;
792 const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
793 failIfNotEmpty(Error);
795 std::unique_ptr<const MCSubtargetInfo> STI(
796 TheTarget->createMCSubtargetInfo(TripleName, "", ""));
797 failIfEmpty(STI, "no subtarget info for target " + TripleName);
799 std::unique_ptr<const MCRegisterInfo> MRI(
800 TheTarget->createMCRegInfo(TripleName));
801 failIfEmpty(MRI, "no register info for target " + TripleName);
803 std::unique_ptr<const MCAsmInfo> AsmInfo(
804 TheTarget->createMCAsmInfo(*MRI, TripleName));
805 failIfEmpty(AsmInfo, "no asm info for target " + TripleName);
807 std::unique_ptr<const MCObjectFileInfo> MOFI(new MCObjectFileInfo);
808 MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
809 std::unique_ptr<MCDisassembler> DisAsm(
810 TheTarget->createMCDisassembler(*STI, Ctx));
811 failIfEmpty(DisAsm, "no disassembler info for target " + TripleName);
813 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
814 failIfEmpty(MII, "no instruction info for target " + TripleName);
816 std::unique_ptr<const MCInstrAnalysis> MIA(
817 TheTarget->createMCInstrAnalysis(MII.get()));
818 failIfEmpty(MIA, "no instruction analysis info for target " + TripleName);
820 auto SanCovAddrs = findSanitizerCovFunctions(O);
821 if (SanCovAddrs.empty())
822 fail("__sanitizer_cov* functions not found");
824 for (object::SectionRef Section : O.sections()) {
825 if (Section.isVirtual() || !Section.isText()) // llvm-objdump does the same.
826 continue;
827 uint64_t SectionAddr = Section.getAddress();
828 uint64_t SectSize = Section.getSize();
829 if (!SectSize)
830 continue;
832 StringRef BytesStr;
833 failIfError(Section.getContents(BytesStr));
834 ArrayRef<uint8_t> Bytes(reinterpret_cast<const uint8_t *>(BytesStr.data()),
835 BytesStr.size());
837 for (uint64_t Index = 0, Size = 0; Index < Section.getSize();
838 Index += Size) {
839 MCInst Inst;
840 if (!DisAsm->getInstruction(Inst, Size, Bytes.slice(Index),
841 SectionAddr + Index, nulls(), nulls())) {
842 if (Size == 0)
843 Size = 1;
844 continue;
846 uint64_t Addr = Index + SectionAddr;
847 // Sanitizer coverage uses the address of the next instruction - 1.
848 uint64_t CovPoint = getPreviousInstructionPc(Addr + Size, TheTriple);
849 uint64_t Target;
850 if (MIA->isCall(Inst) &&
851 MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target) &&
852 SanCovAddrs.find(Target) != SanCovAddrs.end())
853 Addrs->insert(CovPoint);
858 static void
859 visitObjectFiles(const object::Archive &A,
860 function_ref<void(const object::ObjectFile &)> Fn) {
861 Error Err = Error::success();
862 for (auto &C : A.children(Err)) {
863 Expected<std::unique_ptr<object::Binary>> ChildOrErr = C.getAsBinary();
864 failIfError(ChildOrErr);
865 if (auto *O = dyn_cast<object::ObjectFile>(&*ChildOrErr.get()))
866 Fn(*O);
867 else
868 failIfError(object::object_error::invalid_file_type);
870 failIfError(std::move(Err));
873 static void
874 visitObjectFiles(const std::string &FileName,
875 function_ref<void(const object::ObjectFile &)> Fn) {
876 Expected<object::OwningBinary<object::Binary>> BinaryOrErr =
877 object::createBinary(FileName);
878 if (!BinaryOrErr)
879 failIfError(BinaryOrErr);
881 object::Binary &Binary = *BinaryOrErr.get().getBinary();
882 if (object::Archive *A = dyn_cast<object::Archive>(&Binary))
883 visitObjectFiles(*A, Fn);
884 else if (object::ObjectFile *O = dyn_cast<object::ObjectFile>(&Binary))
885 Fn(*O);
886 else
887 failIfError(object::object_error::invalid_file_type);
890 static std::set<uint64_t>
891 findSanitizerCovFunctions(const std::string &FileName) {
892 std::set<uint64_t> Result;
893 visitObjectFiles(FileName, [&](const object::ObjectFile &O) {
894 auto Addrs = findSanitizerCovFunctions(O);
895 Result.insert(Addrs.begin(), Addrs.end());
897 return Result;
900 // Locate addresses of all coverage points in a file. Coverage point
901 // is defined as the 'address of instruction following __sanitizer_cov
902 // call - 1'.
903 static std::set<uint64_t> findCoveragePointAddrs(const std::string &FileName) {
904 std::set<uint64_t> Result;
905 visitObjectFiles(FileName, [&](const object::ObjectFile &O) {
906 getObjectCoveragePoints(O, &Result);
908 return Result;
911 static void printCovPoints(const std::string &ObjFile, raw_ostream &OS) {
912 for (uint64_t Addr : findCoveragePointAddrs(ObjFile)) {
913 OS << "0x";
914 OS.write_hex(Addr);
915 OS << "\n";
919 static ErrorOr<bool> isCoverageFile(const std::string &FileName) {
920 auto ShortFileName = llvm::sys::path::filename(FileName);
921 if (!SancovFileRegex.match(ShortFileName))
922 return false;
924 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
925 MemoryBuffer::getFile(FileName);
926 if (!BufOrErr) {
927 errs() << "Warning: " << BufOrErr.getError().message() << "("
928 << BufOrErr.getError().value()
929 << "), filename: " << llvm::sys::path::filename(FileName) << "\n";
930 return BufOrErr.getError();
932 std::unique_ptr<MemoryBuffer> Buf = std::move(BufOrErr.get());
933 if (Buf->getBufferSize() < 8) {
934 return false;
936 const FileHeader *Header =
937 reinterpret_cast<const FileHeader *>(Buf->getBufferStart());
938 return Header->Magic == BinCoverageMagic;
941 static bool isSymbolizedCoverageFile(const std::string &FileName) {
942 auto ShortFileName = llvm::sys::path::filename(FileName);
943 return SymcovFileRegex.match(ShortFileName);
946 static std::unique_ptr<SymbolizedCoverage>
947 symbolize(const RawCoverage &Data, const std::string ObjectFile) {
948 auto Coverage = make_unique<SymbolizedCoverage>();
950 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
951 MemoryBuffer::getFile(ObjectFile);
952 failIfError(BufOrErr);
953 SHA1 Hasher;
954 Hasher.update((*BufOrErr)->getBuffer());
955 Coverage->BinaryHash = toHex(Hasher.final());
957 Blacklists B;
958 auto Symbolizer(createSymbolizer());
960 for (uint64_t Addr : *Data.Addrs) {
961 auto LineInfo = Symbolizer->symbolizeCode(ObjectFile, Addr);
962 failIfError(LineInfo);
963 if (B.isBlacklisted(*LineInfo))
964 continue;
966 Coverage->CoveredIds.insert(utohexstr(Addr, true));
969 std::set<uint64_t> AllAddrs = findCoveragePointAddrs(ObjectFile);
970 if (!std::includes(AllAddrs.begin(), AllAddrs.end(), Data.Addrs->begin(),
971 Data.Addrs->end())) {
972 fail("Coverage points in binary and .sancov file do not match.");
974 Coverage->Points = getCoveragePoints(ObjectFile, AllAddrs, *Data.Addrs);
975 return Coverage;
978 struct FileFn {
979 bool operator<(const FileFn &RHS) const {
980 return std::tie(FileName, FunctionName) <
981 std::tie(RHS.FileName, RHS.FunctionName);
984 std::string FileName;
985 std::string FunctionName;
988 static std::set<FileFn>
989 computeFunctions(const std::vector<CoveragePoint> &Points) {
990 std::set<FileFn> Fns;
991 for (const auto &Point : Points) {
992 for (const auto &Loc : Point.Locs) {
993 Fns.insert(FileFn{Loc.FileName, Loc.FunctionName});
996 return Fns;
999 static std::set<FileFn>
1000 computeNotCoveredFunctions(const SymbolizedCoverage &Coverage) {
1001 auto Fns = computeFunctions(Coverage.Points);
1003 for (const auto &Point : Coverage.Points) {
1004 if (Coverage.CoveredIds.find(Point.Id) == Coverage.CoveredIds.end())
1005 continue;
1007 for (const auto &Loc : Point.Locs) {
1008 Fns.erase(FileFn{Loc.FileName, Loc.FunctionName});
1012 return Fns;
1015 static std::set<FileFn>
1016 computeCoveredFunctions(const SymbolizedCoverage &Coverage) {
1017 auto AllFns = computeFunctions(Coverage.Points);
1018 std::set<FileFn> Result;
1020 for (const auto &Point : Coverage.Points) {
1021 if (Coverage.CoveredIds.find(Point.Id) == Coverage.CoveredIds.end())
1022 continue;
1024 for (const auto &Loc : Point.Locs) {
1025 Result.insert(FileFn{Loc.FileName, Loc.FunctionName});
1029 return Result;
1032 typedef std::map<FileFn, std::pair<uint32_t, uint32_t>> FunctionLocs;
1033 // finds first location in a file for each function.
1034 static FunctionLocs resolveFunctions(const SymbolizedCoverage &Coverage,
1035 const std::set<FileFn> &Fns) {
1036 FunctionLocs Result;
1037 for (const auto &Point : Coverage.Points) {
1038 for (const auto &Loc : Point.Locs) {
1039 FileFn Fn = FileFn{Loc.FileName, Loc.FunctionName};
1040 if (Fns.find(Fn) == Fns.end())
1041 continue;
1043 auto P = std::make_pair(Loc.Line, Loc.Column);
1044 auto I = Result.find(Fn);
1045 if (I == Result.end() || I->second > P) {
1046 Result[Fn] = P;
1050 return Result;
1053 static void printFunctionLocs(const FunctionLocs &FnLocs, raw_ostream &OS) {
1054 for (const auto &P : FnLocs) {
1055 OS << stripPathPrefix(P.first.FileName) << ":" << P.second.first << " "
1056 << P.first.FunctionName << "\n";
1059 CoverageStats computeStats(const SymbolizedCoverage &Coverage) {
1060 CoverageStats Stats = {Coverage.Points.size(), Coverage.CoveredIds.size(),
1061 computeFunctions(Coverage.Points).size(),
1062 computeCoveredFunctions(Coverage).size()};
1063 return Stats;
1066 // Print list of covered functions.
1067 // Line format: <file_name>:<line> <function_name>
1068 static void printCoveredFunctions(const SymbolizedCoverage &CovData,
1069 raw_ostream &OS) {
1070 auto CoveredFns = computeCoveredFunctions(CovData);
1071 printFunctionLocs(resolveFunctions(CovData, CoveredFns), OS);
1074 // Print list of not covered functions.
1075 // Line format: <file_name>:<line> <function_name>
1076 static void printNotCoveredFunctions(const SymbolizedCoverage &CovData,
1077 raw_ostream &OS) {
1078 auto NotCoveredFns = computeNotCoveredFunctions(CovData);
1079 printFunctionLocs(resolveFunctions(CovData, NotCoveredFns), OS);
1082 // Read list of files and merges their coverage info.
1083 static void readAndPrintRawCoverage(const std::vector<std::string> &FileNames,
1084 raw_ostream &OS) {
1085 std::vector<std::unique_ptr<RawCoverage>> Covs;
1086 for (const auto &FileName : FileNames) {
1087 auto Cov = RawCoverage::read(FileName);
1088 if (!Cov)
1089 continue;
1090 OS << *Cov.get();
1094 static std::unique_ptr<SymbolizedCoverage>
1095 merge(const std::vector<std::unique_ptr<SymbolizedCoverage>> &Coverages) {
1096 if (Coverages.empty())
1097 return nullptr;
1099 auto Result = make_unique<SymbolizedCoverage>();
1101 for (size_t I = 0; I < Coverages.size(); ++I) {
1102 const SymbolizedCoverage &Coverage = *Coverages[I];
1103 std::string Prefix;
1104 if (Coverages.size() > 1) {
1105 // prefix is not needed when there's only one file.
1106 Prefix = utostr(I);
1109 for (const auto &Id : Coverage.CoveredIds) {
1110 Result->CoveredIds.insert(Prefix + Id);
1113 for (const auto &CovPoint : Coverage.Points) {
1114 CoveragePoint NewPoint(CovPoint);
1115 NewPoint.Id = Prefix + CovPoint.Id;
1116 Result->Points.push_back(NewPoint);
1120 if (Coverages.size() == 1) {
1121 Result->BinaryHash = Coverages[0]->BinaryHash;
1124 return Result;
1127 static std::unique_ptr<SymbolizedCoverage>
1128 readSymbolizeAndMergeCmdArguments(std::vector<std::string> FileNames) {
1129 std::vector<std::unique_ptr<SymbolizedCoverage>> Coverages;
1132 // Short name => file name.
1133 std::map<std::string, std::string> ObjFiles;
1134 std::string FirstObjFile;
1135 std::set<std::string> CovFiles;
1137 // Partition input values into coverage/object files.
1138 for (const auto &FileName : FileNames) {
1139 if (isSymbolizedCoverageFile(FileName)) {
1140 Coverages.push_back(SymbolizedCoverage::read(FileName));
1143 auto ErrorOrIsCoverage = isCoverageFile(FileName);
1144 if (!ErrorOrIsCoverage)
1145 continue;
1146 if (ErrorOrIsCoverage.get()) {
1147 CovFiles.insert(FileName);
1148 } else {
1149 auto ShortFileName = llvm::sys::path::filename(FileName);
1150 if (ObjFiles.find(ShortFileName) != ObjFiles.end()) {
1151 fail("Duplicate binary file with a short name: " + ShortFileName);
1154 ObjFiles[ShortFileName] = FileName;
1155 if (FirstObjFile.empty())
1156 FirstObjFile = FileName;
1160 SmallVector<StringRef, 2> Components;
1162 // Object file => list of corresponding coverage file names.
1163 std::map<std::string, std::vector<std::string>> CoverageByObjFile;
1164 for (const auto &FileName : CovFiles) {
1165 auto ShortFileName = llvm::sys::path::filename(FileName);
1166 auto Ok = SancovFileRegex.match(ShortFileName, &Components);
1167 if (!Ok) {
1168 fail("Can't match coverage file name against "
1169 "<module_name>.<pid>.sancov pattern: " +
1170 FileName);
1173 auto Iter = ObjFiles.find(Components[1]);
1174 if (Iter == ObjFiles.end()) {
1175 fail("Object file for coverage not found: " + FileName);
1178 CoverageByObjFile[Iter->second].push_back(FileName);
1181 for (const auto &Pair : ObjFiles) {
1182 auto FileName = Pair.second;
1183 if (CoverageByObjFile.find(FileName) == CoverageByObjFile.end())
1184 errs() << "WARNING: No coverage file for " << FileName << "\n";
1187 // Read raw coverage and symbolize it.
1188 for (const auto &Pair : CoverageByObjFile) {
1189 if (findSanitizerCovFunctions(Pair.first).empty()) {
1190 errs()
1191 << "WARNING: Ignoring " << Pair.first
1192 << " and its coverage because __sanitizer_cov* functions were not "
1193 "found.\n";
1194 continue;
1197 for (const std::string &CoverageFile : Pair.second) {
1198 auto DataOrError = RawCoverage::read(CoverageFile);
1199 failIfError(DataOrError);
1200 Coverages.push_back(symbolize(*DataOrError.get(), Pair.first));
1205 return merge(Coverages);
1208 } // namespace
1210 int main(int Argc, char **Argv) {
1211 // Print stack trace if we signal out.
1212 sys::PrintStackTraceOnErrorSignal(Argv[0]);
1213 PrettyStackTraceProgram X(Argc, Argv);
1214 llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
1216 llvm::InitializeAllTargetInfos();
1217 llvm::InitializeAllTargetMCs();
1218 llvm::InitializeAllDisassemblers();
1220 cl::ParseCommandLineOptions(Argc, Argv,
1221 "Sanitizer Coverage Processing Tool (sancov)\n\n"
1222 " This tool can extract various coverage-related information from: \n"
1223 " coverage-instrumented binary files, raw .sancov files and their "
1224 "symbolized .symcov version.\n"
1225 " Depending on chosen action the tool expects different input files:\n"
1226 " -print-coverage-pcs - coverage-instrumented binary files\n"
1227 " -print-coverage - .sancov files\n"
1228 " <other actions> - .sancov files & corresponding binary "
1229 "files, .symcov files\n"
1232 // -print doesn't need object files.
1233 if (Action == PrintAction) {
1234 readAndPrintRawCoverage(ClInputFiles, outs());
1235 return 0;
1236 } else if (Action == PrintCovPointsAction) {
1237 // -print-coverage-points doesn't need coverage files.
1238 for (const std::string &ObjFile : ClInputFiles) {
1239 printCovPoints(ObjFile, outs());
1241 return 0;
1244 auto Coverage = readSymbolizeAndMergeCmdArguments(ClInputFiles);
1245 failIf(!Coverage, "No valid coverage files given.");
1247 switch (Action) {
1248 case CoveredFunctionsAction: {
1249 printCoveredFunctions(*Coverage, outs());
1250 return 0;
1252 case NotCoveredFunctionsAction: {
1253 printNotCoveredFunctions(*Coverage, outs());
1254 return 0;
1256 case StatsAction: {
1257 outs() << computeStats(*Coverage);
1258 return 0;
1260 case MergeAction:
1261 case SymbolizeAction: { // merge & symbolize are synonims.
1262 JSONWriter W(outs());
1263 W << *Coverage;
1264 return 0;
1266 case HtmlReportAction:
1267 errs() << "-html-report option is removed: "
1268 "use -symbolize & coverage-report-server.py instead\n";
1269 return 1;
1270 case PrintAction:
1271 case PrintCovPointsAction:
1272 llvm_unreachable("unsupported action");