1 //===-- sancov.cpp --------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 // This file is a command-line tool for reading and analyzing sanitizer
10 //===----------------------------------------------------------------------===//
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/StringExtras.h"
13 #include "llvm/ADT/Twine.h"
14 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
15 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
16 #include "llvm/MC/MCAsmInfo.h"
17 #include "llvm/MC/MCContext.h"
18 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
19 #include "llvm/MC/MCInst.h"
20 #include "llvm/MC/MCInstrAnalysis.h"
21 #include "llvm/MC/MCInstrInfo.h"
22 #include "llvm/MC/MCObjectFileInfo.h"
23 #include "llvm/MC/MCRegisterInfo.h"
24 #include "llvm/MC/MCSubtargetInfo.h"
25 #include "llvm/MC/MCTargetOptions.h"
26 #include "llvm/MC/TargetRegistry.h"
27 #include "llvm/Object/Archive.h"
28 #include "llvm/Object/Binary.h"
29 #include "llvm/Object/COFF.h"
30 #include "llvm/Object/MachO.h"
31 #include "llvm/Object/ObjectFile.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Errc.h"
35 #include "llvm/Support/ErrorOr.h"
36 #include "llvm/Support/FileSystem.h"
37 #include "llvm/Support/InitLLVM.h"
38 #include "llvm/Support/JSON.h"
39 #include "llvm/Support/MD5.h"
40 #include "llvm/Support/MemoryBuffer.h"
41 #include "llvm/Support/Path.h"
42 #include "llvm/Support/Regex.h"
43 #include "llvm/Support/SHA1.h"
44 #include "llvm/Support/SourceMgr.h"
45 #include "llvm/Support/SpecialCaseList.h"
46 #include "llvm/Support/TargetSelect.h"
47 #include "llvm/Support/VirtualFileSystem.h"
48 #include "llvm/Support/YAMLParser.h"
49 #include "llvm/Support/raw_ostream.h"
58 // --------- COMMAND LINE FLAGS ---------
60 cl::OptionCategory
Cat("sancov Options");
63 CoveredFunctionsAction
,
66 NotCoveredFunctionsAction
,
73 cl::opt
<ActionType
> Action(
74 cl::desc("Action (required)"), cl::Required
,
76 clEnumValN(PrintAction
, "print", "Print coverage addresses"),
77 clEnumValN(PrintCovPointsAction
, "print-coverage-pcs",
78 "Print coverage instrumentation points addresses."),
79 clEnumValN(CoveredFunctionsAction
, "covered-functions",
80 "Print all covered funcions."),
81 clEnumValN(NotCoveredFunctionsAction
, "not-covered-functions",
82 "Print all not covered funcions."),
83 clEnumValN(StatsAction
, "print-coverage-stats",
84 "Print coverage statistics."),
85 clEnumValN(HtmlReportAction
, "html-report",
86 "REMOVED. Use -symbolize & coverage-report-server.py."),
87 clEnumValN(SymbolizeAction
, "symbolize",
88 "Produces a symbolized JSON report from binary report."),
89 clEnumValN(MergeAction
, "merge", "Merges reports.")),
92 static cl::list
<std::string
>
93 ClInputFiles(cl::Positional
, cl::OneOrMore
,
94 cl::desc("<action> <binary files...> <.sancov files...> "
95 "<.symcov files...>"),
98 static cl::opt
<bool> ClDemangle("demangle", cl::init(true),
99 cl::desc("Print demangled function name"),
103 ClSkipDeadFiles("skip-dead-files", cl::init(true),
104 cl::desc("Do not list dead source files in reports"),
107 static cl::opt
<std::string
>
108 ClStripPathPrefix("strip_path_prefix", cl::init(""),
109 cl::desc("Strip this prefix from file paths in reports"),
112 static cl::opt
<std::string
>
113 ClIgnorelist("ignorelist", cl::init(""),
114 cl::desc("Ignorelist file (sanitizer ignorelist format)"),
117 static cl::opt
<bool> ClUseDefaultIgnorelist(
118 "use_default_ignorelist", cl::init(true), cl::Hidden
,
119 cl::desc("Controls if default ignorelist should be used"), cl::cat(Cat
));
121 static const char *const DefaultIgnorelistStr
= "fun:__sanitizer_.*\n"
122 "src:/usr/include/.*\n"
123 "src:.*/libc\\+\\+/.*\n";
125 // --------- FORMAT SPECIFICATION ---------
132 static const uint32_t BinCoverageMagic
= 0xC0BFFFFF;
133 static const uint32_t Bitness32
= 0xFFFFFF32;
134 static const uint32_t Bitness64
= 0xFFFFFF64;
136 static const Regex
SancovFileRegex("(.*)\\.[0-9]+\\.sancov");
137 static const Regex
SymcovFileRegex(".*\\.symcov");
139 // --------- MAIN DATASTRUCTURES ----------
141 // Contents of .sancov file: list of coverage point addresses that were
144 explicit RawCoverage(std::unique_ptr
<std::set
<uint64_t>> Addrs
)
145 : Addrs(std::move(Addrs
)) {}
147 // Read binary .sancov file.
148 static ErrorOr
<std::unique_ptr
<RawCoverage
>>
149 read(const std::string
&FileName
);
151 std::unique_ptr
<std::set
<uint64_t>> Addrs
;
154 // Coverage point has an opaque Id and corresponds to multiple source locations.
155 struct CoveragePoint
{
156 explicit CoveragePoint(const std::string
&Id
) : Id(Id
) {}
159 SmallVector
<DILineInfo
, 1> Locs
;
162 // Symcov file content: set of covered Ids plus information about all available
164 struct SymbolizedCoverage
{
165 // Read json .symcov file.
166 static std::unique_ptr
<SymbolizedCoverage
> read(const std::string
&InputFile
);
168 std::set
<std::string
> CoveredIds
;
169 std::string BinaryHash
;
170 std::vector
<CoveragePoint
> Points
;
173 struct CoverageStats
{
180 // --------- ERROR HANDLING ---------
182 static void fail(const llvm::Twine
&E
) {
183 errs() << "ERROR: " << E
<< "\n";
187 static void failIf(bool B
, const llvm::Twine
&E
) {
192 static void failIfError(std::error_code Error
) {
195 errs() << "ERROR: " << Error
.message() << "(" << Error
.value() << ")\n";
199 template <typename T
> static void failIfError(const ErrorOr
<T
> &E
) {
200 failIfError(E
.getError());
203 static void failIfError(Error Err
) {
205 logAllUnhandledErrors(std::move(Err
), errs(), "ERROR: ");
210 template <typename T
> static void failIfError(Expected
<T
> &E
) {
211 failIfError(E
.takeError());
214 static void failIfNotEmpty(const llvm::Twine
&E
) {
220 template <typename T
>
221 static void failIfEmpty(const std::unique_ptr
<T
> &Ptr
,
222 const std::string
&Message
) {
228 // ----------- Coverage I/O ----------
229 template <typename T
>
230 static void readInts(const char *Start
, const char *End
,
231 std::set
<uint64_t> *Ints
) {
232 const T
*S
= reinterpret_cast<const T
*>(Start
);
233 const T
*E
= reinterpret_cast<const T
*>(End
);
234 std::copy(S
, E
, std::inserter(*Ints
, Ints
->end()));
237 ErrorOr
<std::unique_ptr
<RawCoverage
>>
238 RawCoverage::read(const std::string
&FileName
) {
239 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> BufOrErr
=
240 MemoryBuffer::getFile(FileName
);
242 return BufOrErr
.getError();
243 std::unique_ptr
<MemoryBuffer
> Buf
= std::move(BufOrErr
.get());
244 if (Buf
->getBufferSize() < 8) {
245 errs() << "File too small (<8): " << Buf
->getBufferSize() << '\n';
246 return make_error_code(errc::illegal_byte_sequence
);
248 const FileHeader
*Header
=
249 reinterpret_cast<const FileHeader
*>(Buf
->getBufferStart());
251 if (Header
->Magic
!= BinCoverageMagic
) {
252 errs() << "Wrong magic: " << Header
->Magic
<< '\n';
253 return make_error_code(errc::illegal_byte_sequence
);
256 auto Addrs
= std::make_unique
<std::set
<uint64_t>>();
258 switch (Header
->Bitness
) {
260 readInts
<uint64_t>(Buf
->getBufferStart() + 8, Buf
->getBufferEnd(),
264 readInts
<uint32_t>(Buf
->getBufferStart() + 8, Buf
->getBufferEnd(),
268 errs() << "Unsupported bitness: " << Header
->Bitness
<< '\n';
269 return make_error_code(errc::illegal_byte_sequence
);
272 // Ignore slots that are zero, so a runtime implementation is not required
273 // to compactify the data.
276 return std::unique_ptr
<RawCoverage
>(new RawCoverage(std::move(Addrs
)));
279 // Print coverage addresses.
280 raw_ostream
&operator<<(raw_ostream
&OS
, const RawCoverage
&CoverageData
) {
281 for (auto Addr
: *CoverageData
.Addrs
) {
289 static raw_ostream
&operator<<(raw_ostream
&OS
, const CoverageStats
&Stats
) {
290 OS
<< "all-edges: " << Stats
.AllPoints
<< "\n";
291 OS
<< "cov-edges: " << Stats
.CovPoints
<< "\n";
292 OS
<< "all-functions: " << Stats
.AllFns
<< "\n";
293 OS
<< "cov-functions: " << Stats
.CovFns
<< "\n";
297 // Output symbolized information for coverage points in JSON.
301 // '<function_name>' : {
302 // '<point_id'> : '<line_number>:'<column_number'.
307 static void operator<<(json::OStream
&W
,
308 const std::vector
<CoveragePoint
> &Points
) {
309 // Group points by file.
310 std::map
<std::string
, std::vector
<const CoveragePoint
*>> PointsByFile
;
311 for (const auto &Point
: Points
) {
312 for (const DILineInfo
&Loc
: Point
.Locs
) {
313 PointsByFile
[Loc
.FileName
].push_back(&Point
);
317 for (const auto &P
: PointsByFile
) {
318 std::string FileName
= P
.first
;
319 std::map
<std::string
, std::vector
<const CoveragePoint
*>> PointsByFn
;
320 for (auto PointPtr
: P
.second
) {
321 for (const DILineInfo
&Loc
: PointPtr
->Locs
) {
322 PointsByFn
[Loc
.FunctionName
].push_back(PointPtr
);
326 W
.attributeObject(P
.first
, [&] {
327 // Group points by function.
328 for (const auto &P
: PointsByFn
) {
329 std::string FunctionName
= P
.first
;
330 std::set
<std::string
> WrittenIds
;
332 W
.attributeObject(FunctionName
, [&] {
333 for (const CoveragePoint
*Point
: P
.second
) {
334 for (const auto &Loc
: Point
->Locs
) {
335 if (Loc
.FileName
!= FileName
|| Loc
.FunctionName
!= FunctionName
)
337 if (WrittenIds
.find(Point
->Id
) != WrittenIds
.end())
340 // Output <point_id> : "<line>:<col>".
341 WrittenIds
.insert(Point
->Id
);
342 W
.attribute(Point
->Id
,
343 (utostr(Loc
.Line
) + ":" + utostr(Loc
.Column
)));
352 static void operator<<(json::OStream
&W
, const SymbolizedCoverage
&C
) {
354 W
.attributeArray("covered-points", [&] {
355 for (const std::string
&P
: C
.CoveredIds
) {
359 W
.attribute("binary-hash", C
.BinaryHash
);
360 W
.attributeObject("point-symbol-info", [&] { W
<< C
.Points
; });
364 static std::string
parseScalarString(yaml::Node
*N
) {
365 SmallString
<64> StringStorage
;
366 yaml::ScalarNode
*S
= dyn_cast
<yaml::ScalarNode
>(N
);
367 failIf(!S
, "expected string");
368 return std::string(S
->getValue(StringStorage
));
371 std::unique_ptr
<SymbolizedCoverage
>
372 SymbolizedCoverage::read(const std::string
&InputFile
) {
373 auto Coverage(std::make_unique
<SymbolizedCoverage
>());
375 std::map
<std::string
, CoveragePoint
> Points
;
376 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> BufOrErr
=
377 MemoryBuffer::getFile(InputFile
);
378 failIfError(BufOrErr
);
381 yaml::Stream
S(**BufOrErr
, SM
);
383 yaml::document_iterator DI
= S
.begin();
384 failIf(DI
== S
.end(), "empty document: " + InputFile
);
385 yaml::Node
*Root
= DI
->getRoot();
386 failIf(!Root
, "expecting root node: " + InputFile
);
387 yaml::MappingNode
*Top
= dyn_cast
<yaml::MappingNode
>(Root
);
388 failIf(!Top
, "expecting mapping node: " + InputFile
);
390 for (auto &KVNode
: *Top
) {
391 auto Key
= parseScalarString(KVNode
.getKey());
393 if (Key
== "covered-points") {
394 yaml::SequenceNode
*Points
=
395 dyn_cast
<yaml::SequenceNode
>(KVNode
.getValue());
396 failIf(!Points
, "expected array: " + InputFile
);
398 for (auto I
= Points
->begin(), E
= Points
->end(); I
!= E
; ++I
) {
399 Coverage
->CoveredIds
.insert(parseScalarString(&*I
));
401 } else if (Key
== "binary-hash") {
402 Coverage
->BinaryHash
= parseScalarString(KVNode
.getValue());
403 } else if (Key
== "point-symbol-info") {
404 yaml::MappingNode
*PointSymbolInfo
=
405 dyn_cast
<yaml::MappingNode
>(KVNode
.getValue());
406 failIf(!PointSymbolInfo
, "expected mapping node: " + InputFile
);
408 for (auto &FileKVNode
: *PointSymbolInfo
) {
409 auto Filename
= parseScalarString(FileKVNode
.getKey());
411 yaml::MappingNode
*FileInfo
=
412 dyn_cast
<yaml::MappingNode
>(FileKVNode
.getValue());
413 failIf(!FileInfo
, "expected mapping node: " + InputFile
);
415 for (auto &FunctionKVNode
: *FileInfo
) {
416 auto FunctionName
= parseScalarString(FunctionKVNode
.getKey());
418 yaml::MappingNode
*FunctionInfo
=
419 dyn_cast
<yaml::MappingNode
>(FunctionKVNode
.getValue());
420 failIf(!FunctionInfo
, "expected mapping node: " + InputFile
);
422 for (auto &PointKVNode
: *FunctionInfo
) {
423 auto PointId
= parseScalarString(PointKVNode
.getKey());
424 auto Loc
= parseScalarString(PointKVNode
.getValue());
426 size_t ColonPos
= Loc
.find(':');
427 failIf(ColonPos
== std::string::npos
, "expected ':': " + InputFile
);
429 auto LineStr
= Loc
.substr(0, ColonPos
);
430 auto ColStr
= Loc
.substr(ColonPos
+ 1, Loc
.size());
432 if (Points
.find(PointId
) == Points
.end())
433 Points
.insert(std::make_pair(PointId
, CoveragePoint(PointId
)));
436 LineInfo
.FileName
= Filename
;
437 LineInfo
.FunctionName
= FunctionName
;
439 LineInfo
.Line
= std::strtoul(LineStr
.c_str(), &End
, 10);
440 LineInfo
.Column
= std::strtoul(ColStr
.c_str(), &End
, 10);
442 CoveragePoint
*CoveragePoint
= &Points
.find(PointId
)->second
;
443 CoveragePoint
->Locs
.push_back(LineInfo
);
448 errs() << "Ignoring unknown key: " << Key
<< "\n";
452 for (auto &KV
: Points
) {
453 Coverage
->Points
.push_back(KV
.second
);
459 // ---------- MAIN FUNCTIONALITY ----------
461 std::string
stripPathPrefix(std::string Path
) {
462 if (ClStripPathPrefix
.empty())
464 size_t Pos
= Path
.find(ClStripPathPrefix
);
465 if (Pos
== std::string::npos
)
467 return Path
.substr(Pos
+ ClStripPathPrefix
.size());
470 static std::unique_ptr
<symbolize::LLVMSymbolizer
> createSymbolizer() {
471 symbolize::LLVMSymbolizer::Options SymbolizerOptions
;
472 SymbolizerOptions
.Demangle
= ClDemangle
;
473 SymbolizerOptions
.UseSymbolTable
= true;
474 return std::unique_ptr
<symbolize::LLVMSymbolizer
>(
475 new symbolize::LLVMSymbolizer(SymbolizerOptions
));
478 static std::string
normalizeFilename(const std::string
&FileName
) {
479 SmallString
<256> S(FileName
);
480 sys::path::remove_dots(S
, /* remove_dot_dot */ true);
481 return stripPathPrefix(sys::path::convert_to_slash(std::string(S
)));
487 : DefaultIgnorelist(createDefaultIgnorelist()),
488 UserIgnorelist(createUserIgnorelist()) {}
490 bool isIgnorelisted(const DILineInfo
&I
) {
491 if (DefaultIgnorelist
&&
492 DefaultIgnorelist
->inSection("sancov", "fun", I
.FunctionName
))
494 if (DefaultIgnorelist
&&
495 DefaultIgnorelist
->inSection("sancov", "src", I
.FileName
))
497 if (UserIgnorelist
&&
498 UserIgnorelist
->inSection("sancov", "fun", I
.FunctionName
))
500 if (UserIgnorelist
&&
501 UserIgnorelist
->inSection("sancov", "src", I
.FileName
))
507 static std::unique_ptr
<SpecialCaseList
> createDefaultIgnorelist() {
508 if (!ClUseDefaultIgnorelist
)
509 return std::unique_ptr
<SpecialCaseList
>();
510 std::unique_ptr
<MemoryBuffer
> MB
=
511 MemoryBuffer::getMemBuffer(DefaultIgnorelistStr
);
513 auto Ignorelist
= SpecialCaseList::create(MB
.get(), Error
);
514 failIfNotEmpty(Error
);
518 static std::unique_ptr
<SpecialCaseList
> createUserIgnorelist() {
519 if (ClIgnorelist
.empty())
520 return std::unique_ptr
<SpecialCaseList
>();
521 return SpecialCaseList::createOrDie({{ClIgnorelist
}},
522 *vfs::getRealFileSystem());
524 std::unique_ptr
<SpecialCaseList
> DefaultIgnorelist
;
525 std::unique_ptr
<SpecialCaseList
> UserIgnorelist
;
528 static std::vector
<CoveragePoint
>
529 getCoveragePoints(const std::string
&ObjectFile
,
530 const std::set
<uint64_t> &Addrs
,
531 const std::set
<uint64_t> &CoveredAddrs
) {
532 std::vector
<CoveragePoint
> Result
;
533 auto Symbolizer(createSymbolizer());
536 std::set
<std::string
> CoveredFiles
;
537 if (ClSkipDeadFiles
) {
538 for (auto Addr
: CoveredAddrs
) {
539 // TODO: it would be neccessary to set proper section index here.
540 // object::SectionedAddress::UndefSection works for only absolute
542 object::SectionedAddress ModuleAddress
= {
543 Addr
, object::SectionedAddress::UndefSection
};
545 auto LineInfo
= Symbolizer
->symbolizeCode(ObjectFile
, ModuleAddress
);
546 failIfError(LineInfo
);
547 CoveredFiles
.insert(LineInfo
->FileName
);
549 Symbolizer
->symbolizeInlinedCode(ObjectFile
, ModuleAddress
);
550 failIfError(InliningInfo
);
551 for (uint32_t I
= 0; I
< InliningInfo
->getNumberOfFrames(); ++I
) {
552 auto FrameInfo
= InliningInfo
->getFrame(I
);
553 CoveredFiles
.insert(FrameInfo
.FileName
);
558 for (auto Addr
: Addrs
) {
559 std::set
<DILineInfo
> Infos
; // deduplicate debug info.
561 // TODO: it would be neccessary to set proper section index here.
562 // object::SectionedAddress::UndefSection works for only absolute addresses.
563 object::SectionedAddress ModuleAddress
= {
564 Addr
, object::SectionedAddress::UndefSection
};
566 auto LineInfo
= Symbolizer
->symbolizeCode(ObjectFile
, ModuleAddress
);
567 failIfError(LineInfo
);
568 if (ClSkipDeadFiles
&&
569 CoveredFiles
.find(LineInfo
->FileName
) == CoveredFiles
.end())
571 LineInfo
->FileName
= normalizeFilename(LineInfo
->FileName
);
572 if (Ig
.isIgnorelisted(*LineInfo
))
575 auto Id
= utohexstr(Addr
, true);
576 auto Point
= CoveragePoint(Id
);
577 Infos
.insert(*LineInfo
);
578 Point
.Locs
.push_back(*LineInfo
);
581 Symbolizer
->symbolizeInlinedCode(ObjectFile
, ModuleAddress
);
582 failIfError(InliningInfo
);
583 for (uint32_t I
= 0; I
< InliningInfo
->getNumberOfFrames(); ++I
) {
584 auto FrameInfo
= InliningInfo
->getFrame(I
);
585 if (ClSkipDeadFiles
&&
586 CoveredFiles
.find(FrameInfo
.FileName
) == CoveredFiles
.end())
588 FrameInfo
.FileName
= normalizeFilename(FrameInfo
.FileName
);
589 if (Ig
.isIgnorelisted(FrameInfo
))
591 if (Infos
.find(FrameInfo
) == Infos
.end()) {
592 Infos
.insert(FrameInfo
);
593 Point
.Locs
.push_back(FrameInfo
);
597 Result
.push_back(Point
);
603 static bool isCoveragePointSymbol(StringRef Name
) {
604 return Name
== "__sanitizer_cov" || Name
== "__sanitizer_cov_with_check" ||
605 Name
== "__sanitizer_cov_trace_func_enter" ||
606 Name
== "__sanitizer_cov_trace_pc_guard" ||
607 // Mac has '___' prefix
608 Name
== "___sanitizer_cov" || Name
== "___sanitizer_cov_with_check" ||
609 Name
== "___sanitizer_cov_trace_func_enter" ||
610 Name
== "___sanitizer_cov_trace_pc_guard";
613 // Locate __sanitizer_cov* function addresses inside the stubs table on MachO.
614 static void findMachOIndirectCovFunctions(const object::MachOObjectFile
&O
,
615 std::set
<uint64_t> *Result
) {
616 MachO::dysymtab_command Dysymtab
= O
.getDysymtabLoadCommand();
617 MachO::symtab_command Symtab
= O
.getSymtabLoadCommand();
619 for (const auto &Load
: O
.load_commands()) {
620 if (Load
.C
.cmd
== MachO::LC_SEGMENT_64
) {
621 MachO::segment_command_64 Seg
= O
.getSegment64LoadCommand(Load
);
622 for (unsigned J
= 0; J
< Seg
.nsects
; ++J
) {
623 MachO::section_64 Sec
= O
.getSection64(Load
, J
);
625 uint32_t SectionType
= Sec
.flags
& MachO::SECTION_TYPE
;
626 if (SectionType
== MachO::S_SYMBOL_STUBS
) {
627 uint32_t Stride
= Sec
.reserved2
;
628 uint32_t Cnt
= Sec
.size
/ Stride
;
629 uint32_t N
= Sec
.reserved1
;
630 for (uint32_t J
= 0; J
< Cnt
&& N
+ J
< Dysymtab
.nindirectsyms
; J
++) {
631 uint32_t IndirectSymbol
=
632 O
.getIndirectSymbolTableEntry(Dysymtab
, N
+ J
);
633 uint64_t Addr
= Sec
.addr
+ J
* Stride
;
634 if (IndirectSymbol
< Symtab
.nsyms
) {
635 object::SymbolRef Symbol
= *(O
.getSymbolByIndex(IndirectSymbol
));
636 Expected
<StringRef
> Name
= Symbol
.getName();
638 if (isCoveragePointSymbol(Name
.get())) {
639 Result
->insert(Addr
);
646 if (Load
.C
.cmd
== MachO::LC_SEGMENT
) {
647 errs() << "ERROR: 32 bit MachO binaries not supported\n";
652 // Locate __sanitizer_cov* function addresses that are used for coverage
654 static std::set
<uint64_t>
655 findSanitizerCovFunctions(const object::ObjectFile
&O
) {
656 std::set
<uint64_t> Result
;
658 for (const object::SymbolRef
&Symbol
: O
.symbols()) {
659 Expected
<uint64_t> AddressOrErr
= Symbol
.getAddress();
660 failIfError(AddressOrErr
);
661 uint64_t Address
= AddressOrErr
.get();
663 Expected
<StringRef
> NameOrErr
= Symbol
.getName();
664 failIfError(NameOrErr
);
665 StringRef Name
= NameOrErr
.get();
667 Expected
<uint32_t> FlagsOrErr
= Symbol
.getFlags();
668 // TODO: Test this error.
669 failIfError(FlagsOrErr
);
670 uint32_t Flags
= FlagsOrErr
.get();
672 if (!(Flags
& object::BasicSymbolRef::SF_Undefined
) &&
673 isCoveragePointSymbol(Name
)) {
674 Result
.insert(Address
);
678 if (const auto *CO
= dyn_cast
<object::COFFObjectFile
>(&O
)) {
679 for (const object::ExportDirectoryEntryRef
&Export
:
680 CO
->export_directories()) {
682 failIfError(Export
.getExportRVA(RVA
));
685 failIfError(Export
.getSymbolName(Name
));
687 if (isCoveragePointSymbol(Name
))
688 Result
.insert(CO
->getImageBase() + RVA
);
692 if (const auto *MO
= dyn_cast
<object::MachOObjectFile
>(&O
)) {
693 findMachOIndirectCovFunctions(*MO
, &Result
);
700 // compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h:GetPreviousInstructionPc
701 // GetPreviousInstructionPc.
702 static uint64_t getPreviousInstructionPc(uint64_t PC
,
704 if (TheTriple
.isARM())
705 return (PC
- 3) & (~1);
706 if (TheTriple
.isMIPS() || TheTriple
.isSPARC())
708 if (TheTriple
.isRISCV())
710 if (TheTriple
.isX86() || TheTriple
.isSystemZ())
715 // Locate addresses of all coverage points in a file. Coverage point
716 // is defined as the 'address of instruction following __sanitizer_cov
718 static void getObjectCoveragePoints(const object::ObjectFile
&O
,
719 std::set
<uint64_t> *Addrs
) {
720 Triple
TheTriple("unknown-unknown-unknown");
721 TheTriple
.setArch(Triple::ArchType(O
.getArch()));
722 auto TripleName
= TheTriple
.getTriple();
725 const Target
*TheTarget
= TargetRegistry::lookupTarget(TripleName
, Error
);
726 failIfNotEmpty(Error
);
728 std::unique_ptr
<const MCSubtargetInfo
> STI(
729 TheTarget
->createMCSubtargetInfo(TripleName
, "", ""));
730 failIfEmpty(STI
, "no subtarget info for target " + TripleName
);
732 std::unique_ptr
<const MCRegisterInfo
> MRI(
733 TheTarget
->createMCRegInfo(TripleName
));
734 failIfEmpty(MRI
, "no register info for target " + TripleName
);
736 MCTargetOptions MCOptions
;
737 std::unique_ptr
<const MCAsmInfo
> AsmInfo(
738 TheTarget
->createMCAsmInfo(*MRI
, TripleName
, MCOptions
));
739 failIfEmpty(AsmInfo
, "no asm info for target " + TripleName
);
741 MCContext
Ctx(TheTriple
, AsmInfo
.get(), MRI
.get(), STI
.get());
742 std::unique_ptr
<MCDisassembler
> DisAsm(
743 TheTarget
->createMCDisassembler(*STI
, Ctx
));
744 failIfEmpty(DisAsm
, "no disassembler info for target " + TripleName
);
746 std::unique_ptr
<const MCInstrInfo
> MII(TheTarget
->createMCInstrInfo());
747 failIfEmpty(MII
, "no instruction info for target " + TripleName
);
749 std::unique_ptr
<const MCInstrAnalysis
> MIA(
750 TheTarget
->createMCInstrAnalysis(MII
.get()));
751 failIfEmpty(MIA
, "no instruction analysis info for target " + TripleName
);
753 auto SanCovAddrs
= findSanitizerCovFunctions(O
);
754 if (SanCovAddrs
.empty())
755 fail("__sanitizer_cov* functions not found");
757 for (object::SectionRef Section
: O
.sections()) {
758 if (Section
.isVirtual() || !Section
.isText()) // llvm-objdump does the same.
760 uint64_t SectionAddr
= Section
.getAddress();
761 uint64_t SectSize
= Section
.getSize();
765 Expected
<StringRef
> BytesStr
= Section
.getContents();
766 failIfError(BytesStr
);
767 ArrayRef
<uint8_t> Bytes
= arrayRefFromStringRef(*BytesStr
);
769 for (uint64_t Index
= 0, Size
= 0; Index
< Section
.getSize();
772 ArrayRef
<uint8_t> ThisBytes
= Bytes
.slice(Index
);
773 uint64_t ThisAddr
= SectionAddr
+ Index
;
774 if (!DisAsm
->getInstruction(Inst
, Size
, ThisBytes
, ThisAddr
, nulls())) {
776 Size
= std::min
<uint64_t>(
778 DisAsm
->suggestBytesToSkip(ThisBytes
, ThisAddr
));
781 uint64_t Addr
= Index
+ SectionAddr
;
782 // Sanitizer coverage uses the address of the next instruction - 1.
783 uint64_t CovPoint
= getPreviousInstructionPc(Addr
+ Size
, TheTriple
);
785 if (MIA
->isCall(Inst
) &&
786 MIA
->evaluateBranch(Inst
, SectionAddr
+ Index
, Size
, Target
) &&
787 SanCovAddrs
.find(Target
) != SanCovAddrs
.end())
788 Addrs
->insert(CovPoint
);
794 visitObjectFiles(const object::Archive
&A
,
795 function_ref
<void(const object::ObjectFile
&)> Fn
) {
796 Error Err
= Error::success();
797 for (auto &C
: A
.children(Err
)) {
798 Expected
<std::unique_ptr
<object::Binary
>> ChildOrErr
= C
.getAsBinary();
799 failIfError(ChildOrErr
);
800 if (auto *O
= dyn_cast
<object::ObjectFile
>(&*ChildOrErr
.get()))
803 failIfError(object::object_error::invalid_file_type
);
805 failIfError(std::move(Err
));
809 visitObjectFiles(const std::string
&FileName
,
810 function_ref
<void(const object::ObjectFile
&)> Fn
) {
811 Expected
<object::OwningBinary
<object::Binary
>> BinaryOrErr
=
812 object::createBinary(FileName
);
814 failIfError(BinaryOrErr
);
816 object::Binary
&Binary
= *BinaryOrErr
.get().getBinary();
817 if (object::Archive
*A
= dyn_cast
<object::Archive
>(&Binary
))
818 visitObjectFiles(*A
, Fn
);
819 else if (object::ObjectFile
*O
= dyn_cast
<object::ObjectFile
>(&Binary
))
822 failIfError(object::object_error::invalid_file_type
);
825 static std::set
<uint64_t>
826 findSanitizerCovFunctions(const std::string
&FileName
) {
827 std::set
<uint64_t> Result
;
828 visitObjectFiles(FileName
, [&](const object::ObjectFile
&O
) {
829 auto Addrs
= findSanitizerCovFunctions(O
);
830 Result
.insert(Addrs
.begin(), Addrs
.end());
835 // Locate addresses of all coverage points in a file. Coverage point
836 // is defined as the 'address of instruction following __sanitizer_cov
838 static std::set
<uint64_t> findCoveragePointAddrs(const std::string
&FileName
) {
839 std::set
<uint64_t> Result
;
840 visitObjectFiles(FileName
, [&](const object::ObjectFile
&O
) {
841 getObjectCoveragePoints(O
, &Result
);
846 static void printCovPoints(const std::string
&ObjFile
, raw_ostream
&OS
) {
847 for (uint64_t Addr
: findCoveragePointAddrs(ObjFile
)) {
854 static ErrorOr
<bool> isCoverageFile(const std::string
&FileName
) {
855 auto ShortFileName
= llvm::sys::path::filename(FileName
);
856 if (!SancovFileRegex
.match(ShortFileName
))
859 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> BufOrErr
=
860 MemoryBuffer::getFile(FileName
);
862 errs() << "Warning: " << BufOrErr
.getError().message() << "("
863 << BufOrErr
.getError().value()
864 << "), filename: " << llvm::sys::path::filename(FileName
) << "\n";
865 return BufOrErr
.getError();
867 std::unique_ptr
<MemoryBuffer
> Buf
= std::move(BufOrErr
.get());
868 if (Buf
->getBufferSize() < 8) {
871 const FileHeader
*Header
=
872 reinterpret_cast<const FileHeader
*>(Buf
->getBufferStart());
873 return Header
->Magic
== BinCoverageMagic
;
876 static bool isSymbolizedCoverageFile(const std::string
&FileName
) {
877 auto ShortFileName
= llvm::sys::path::filename(FileName
);
878 return SymcovFileRegex
.match(ShortFileName
);
881 static std::unique_ptr
<SymbolizedCoverage
>
882 symbolize(const RawCoverage
&Data
, const std::string ObjectFile
) {
883 auto Coverage
= std::make_unique
<SymbolizedCoverage
>();
885 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> BufOrErr
=
886 MemoryBuffer::getFile(ObjectFile
);
887 failIfError(BufOrErr
);
889 Hasher
.update((*BufOrErr
)->getBuffer());
890 Coverage
->BinaryHash
= toHex(Hasher
.final());
893 auto Symbolizer(createSymbolizer());
895 for (uint64_t Addr
: *Data
.Addrs
) {
896 // TODO: it would be neccessary to set proper section index here.
897 // object::SectionedAddress::UndefSection works for only absolute addresses.
898 auto LineInfo
= Symbolizer
->symbolizeCode(
899 ObjectFile
, {Addr
, object::SectionedAddress::UndefSection
});
900 failIfError(LineInfo
);
901 if (Ig
.isIgnorelisted(*LineInfo
))
904 Coverage
->CoveredIds
.insert(utohexstr(Addr
, true));
907 std::set
<uint64_t> AllAddrs
= findCoveragePointAddrs(ObjectFile
);
908 if (!std::includes(AllAddrs
.begin(), AllAddrs
.end(), Data
.Addrs
->begin(),
909 Data
.Addrs
->end())) {
910 fail("Coverage points in binary and .sancov file do not match.");
912 Coverage
->Points
= getCoveragePoints(ObjectFile
, AllAddrs
, *Data
.Addrs
);
917 bool operator<(const FileFn
&RHS
) const {
918 return std::tie(FileName
, FunctionName
) <
919 std::tie(RHS
.FileName
, RHS
.FunctionName
);
922 std::string FileName
;
923 std::string FunctionName
;
926 static std::set
<FileFn
>
927 computeFunctions(const std::vector
<CoveragePoint
> &Points
) {
928 std::set
<FileFn
> Fns
;
929 for (const auto &Point
: Points
) {
930 for (const auto &Loc
: Point
.Locs
) {
931 Fns
.insert(FileFn
{Loc
.FileName
, Loc
.FunctionName
});
937 static std::set
<FileFn
>
938 computeNotCoveredFunctions(const SymbolizedCoverage
&Coverage
) {
939 auto Fns
= computeFunctions(Coverage
.Points
);
941 for (const auto &Point
: Coverage
.Points
) {
942 if (Coverage
.CoveredIds
.find(Point
.Id
) == Coverage
.CoveredIds
.end())
945 for (const auto &Loc
: Point
.Locs
) {
946 Fns
.erase(FileFn
{Loc
.FileName
, Loc
.FunctionName
});
953 static std::set
<FileFn
>
954 computeCoveredFunctions(const SymbolizedCoverage
&Coverage
) {
955 auto AllFns
= computeFunctions(Coverage
.Points
);
956 std::set
<FileFn
> Result
;
958 for (const auto &Point
: Coverage
.Points
) {
959 if (Coverage
.CoveredIds
.find(Point
.Id
) == Coverage
.CoveredIds
.end())
962 for (const auto &Loc
: Point
.Locs
) {
963 Result
.insert(FileFn
{Loc
.FileName
, Loc
.FunctionName
});
970 typedef std::map
<FileFn
, std::pair
<uint32_t, uint32_t>> FunctionLocs
;
971 // finds first location in a file for each function.
972 static FunctionLocs
resolveFunctions(const SymbolizedCoverage
&Coverage
,
973 const std::set
<FileFn
> &Fns
) {
975 for (const auto &Point
: Coverage
.Points
) {
976 for (const auto &Loc
: Point
.Locs
) {
977 FileFn Fn
= FileFn
{Loc
.FileName
, Loc
.FunctionName
};
978 if (Fns
.find(Fn
) == Fns
.end())
981 auto P
= std::make_pair(Loc
.Line
, Loc
.Column
);
982 auto I
= Result
.find(Fn
);
983 if (I
== Result
.end() || I
->second
> P
) {
991 static void printFunctionLocs(const FunctionLocs
&FnLocs
, raw_ostream
&OS
) {
992 for (const auto &P
: FnLocs
) {
993 OS
<< stripPathPrefix(P
.first
.FileName
) << ":" << P
.second
.first
<< " "
994 << P
.first
.FunctionName
<< "\n";
997 CoverageStats
computeStats(const SymbolizedCoverage
&Coverage
) {
998 CoverageStats Stats
= {Coverage
.Points
.size(), Coverage
.CoveredIds
.size(),
999 computeFunctions(Coverage
.Points
).size(),
1000 computeCoveredFunctions(Coverage
).size()};
1004 // Print list of covered functions.
1005 // Line format: <file_name>:<line> <function_name>
1006 static void printCoveredFunctions(const SymbolizedCoverage
&CovData
,
1008 auto CoveredFns
= computeCoveredFunctions(CovData
);
1009 printFunctionLocs(resolveFunctions(CovData
, CoveredFns
), OS
);
1012 // Print list of not covered functions.
1013 // Line format: <file_name>:<line> <function_name>
1014 static void printNotCoveredFunctions(const SymbolizedCoverage
&CovData
,
1016 auto NotCoveredFns
= computeNotCoveredFunctions(CovData
);
1017 printFunctionLocs(resolveFunctions(CovData
, NotCoveredFns
), OS
);
1020 // Read list of files and merges their coverage info.
1021 static void readAndPrintRawCoverage(const std::vector
<std::string
> &FileNames
,
1023 std::vector
<std::unique_ptr
<RawCoverage
>> Covs
;
1024 for (const auto &FileName
: FileNames
) {
1025 auto Cov
= RawCoverage::read(FileName
);
1032 static std::unique_ptr
<SymbolizedCoverage
>
1033 merge(const std::vector
<std::unique_ptr
<SymbolizedCoverage
>> &Coverages
) {
1034 if (Coverages
.empty())
1037 auto Result
= std::make_unique
<SymbolizedCoverage
>();
1039 for (size_t I
= 0; I
< Coverages
.size(); ++I
) {
1040 const SymbolizedCoverage
&Coverage
= *Coverages
[I
];
1042 if (Coverages
.size() > 1) {
1043 // prefix is not needed when there's only one file.
1047 for (const auto &Id
: Coverage
.CoveredIds
) {
1048 Result
->CoveredIds
.insert(Prefix
+ Id
);
1051 for (const auto &CovPoint
: Coverage
.Points
) {
1052 CoveragePoint
NewPoint(CovPoint
);
1053 NewPoint
.Id
= Prefix
+ CovPoint
.Id
;
1054 Result
->Points
.push_back(NewPoint
);
1058 if (Coverages
.size() == 1) {
1059 Result
->BinaryHash
= Coverages
[0]->BinaryHash
;
1065 static std::unique_ptr
<SymbolizedCoverage
>
1066 readSymbolizeAndMergeCmdArguments(std::vector
<std::string
> FileNames
) {
1067 std::vector
<std::unique_ptr
<SymbolizedCoverage
>> Coverages
;
1070 // Short name => file name.
1071 std::map
<std::string
, std::string
> ObjFiles
;
1072 std::string FirstObjFile
;
1073 std::set
<std::string
> CovFiles
;
1075 // Partition input values into coverage/object files.
1076 for (const auto &FileName
: FileNames
) {
1077 if (isSymbolizedCoverageFile(FileName
)) {
1078 Coverages
.push_back(SymbolizedCoverage::read(FileName
));
1081 auto ErrorOrIsCoverage
= isCoverageFile(FileName
);
1082 if (!ErrorOrIsCoverage
)
1084 if (ErrorOrIsCoverage
.get()) {
1085 CovFiles
.insert(FileName
);
1087 auto ShortFileName
= llvm::sys::path::filename(FileName
);
1088 if (ObjFiles
.find(std::string(ShortFileName
)) != ObjFiles
.end()) {
1089 fail("Duplicate binary file with a short name: " + ShortFileName
);
1092 ObjFiles
[std::string(ShortFileName
)] = FileName
;
1093 if (FirstObjFile
.empty())
1094 FirstObjFile
= FileName
;
1098 SmallVector
<StringRef
, 2> Components
;
1100 // Object file => list of corresponding coverage file names.
1101 std::map
<std::string
, std::vector
<std::string
>> CoverageByObjFile
;
1102 for (const auto &FileName
: CovFiles
) {
1103 auto ShortFileName
= llvm::sys::path::filename(FileName
);
1104 auto Ok
= SancovFileRegex
.match(ShortFileName
, &Components
);
1106 fail("Can't match coverage file name against "
1107 "<module_name>.<pid>.sancov pattern: " +
1111 auto Iter
= ObjFiles
.find(std::string(Components
[1]));
1112 if (Iter
== ObjFiles
.end()) {
1113 fail("Object file for coverage not found: " + FileName
);
1116 CoverageByObjFile
[Iter
->second
].push_back(FileName
);
1119 for (const auto &Pair
: ObjFiles
) {
1120 auto FileName
= Pair
.second
;
1121 if (CoverageByObjFile
.find(FileName
) == CoverageByObjFile
.end())
1122 errs() << "WARNING: No coverage file for " << FileName
<< "\n";
1125 // Read raw coverage and symbolize it.
1126 for (const auto &Pair
: CoverageByObjFile
) {
1127 if (findSanitizerCovFunctions(Pair
.first
).empty()) {
1129 << "WARNING: Ignoring " << Pair
.first
1130 << " and its coverage because __sanitizer_cov* functions were not "
1135 for (const std::string
&CoverageFile
: Pair
.second
) {
1136 auto DataOrError
= RawCoverage::read(CoverageFile
);
1137 failIfError(DataOrError
);
1138 Coverages
.push_back(symbolize(*DataOrError
.get(), Pair
.first
));
1143 return merge(Coverages
);
1148 int main(int Argc
, char **Argv
) {
1149 llvm::InitLLVM
X(Argc
, Argv
);
1150 cl::HideUnrelatedOptions(Cat
);
1152 llvm::InitializeAllTargetInfos();
1153 llvm::InitializeAllTargetMCs();
1154 llvm::InitializeAllDisassemblers();
1156 cl::ParseCommandLineOptions(Argc
, Argv
,
1157 "Sanitizer Coverage Processing Tool (sancov)\n\n"
1158 " This tool can extract various coverage-related information from: \n"
1159 " coverage-instrumented binary files, raw .sancov files and their "
1160 "symbolized .symcov version.\n"
1161 " Depending on chosen action the tool expects different input files:\n"
1162 " -print-coverage-pcs - coverage-instrumented binary files\n"
1163 " -print-coverage - .sancov files\n"
1164 " <other actions> - .sancov files & corresponding binary "
1165 "files, .symcov files\n"
1168 // -print doesn't need object files.
1169 if (Action
== PrintAction
) {
1170 readAndPrintRawCoverage(ClInputFiles
, outs());
1172 } else if (Action
== PrintCovPointsAction
) {
1173 // -print-coverage-points doesn't need coverage files.
1174 for (const std::string
&ObjFile
: ClInputFiles
) {
1175 printCovPoints(ObjFile
, outs());
1180 auto Coverage
= readSymbolizeAndMergeCmdArguments(ClInputFiles
);
1181 failIf(!Coverage
, "No valid coverage files given.");
1184 case CoveredFunctionsAction
: {
1185 printCoveredFunctions(*Coverage
, outs());
1188 case NotCoveredFunctionsAction
: {
1189 printNotCoveredFunctions(*Coverage
, outs());
1193 outs() << computeStats(*Coverage
);
1197 case SymbolizeAction
: { // merge & symbolize are synonims.
1198 json::OStream
W(outs(), 2);
1202 case HtmlReportAction
:
1203 errs() << "-html-report option is removed: "
1204 "use -symbolize & coverage-report-server.py instead\n";
1207 case PrintCovPointsAction
:
1208 llvm_unreachable("unsupported action");