2 # Merge or print the coverage data collected by asan's coverage.
3 # Input files are sequences of 4-byte integers.
4 # We need to merge these integers into a set and then
5 # either print them (as hex) or dump them into another file.
19 "Usage: \n" + " " + prog_name
+ " merge FILE [FILE...] > OUTPUT\n"
20 " " + prog_name
+ " print FILE [FILE...]\n"
21 " " + prog_name
+ " unpack FILE [FILE...]\n"
22 " " + prog_name
+ " rawunpack FILE [FILE ...]\n"
23 " " + prog_name
+ " missing BINARY < LIST_OF_PCS\n"
30 if bits
!= 32 and bits
!= 64:
31 raise Exception("Wrong bitness: %d" % bits
)
34 def TypeCodeForBits(bits
):
36 return "L" if bits
== 64 else "I"
39 def TypeCodeForStruct(bits
):
41 return "Q" if bits
== 64 else "I"
44 kMagic32SecondHalf
= 0xFFFFFF32
45 kMagic64SecondHalf
= 0xFFFFFF64
46 kMagicFirstHalf
= 0xC0BFFFFF
49 def MagicForBits(bits
):
51 if sys
.byteorder
== "little":
53 kMagic64SecondHalf
if bits
== 64 else kMagic32SecondHalf
,
59 kMagic64SecondHalf
if bits
== 64 else kMagic32SecondHalf
,
63 def ReadMagicAndReturnBitness(f
, path
):
64 magic_bytes
= f
.read(8)
65 magic_words
= struct
.unpack("II", magic_bytes
)
67 idx
= 1 if sys
.byteorder
== "little" else 0
68 if magic_words
[idx
] == kMagicFirstHalf
:
69 if magic_words
[1 - idx
] == kMagic64SecondHalf
:
71 elif magic_words
[1 - idx
] == kMagic32SecondHalf
:
74 raise Exception("Bad magic word in %s" % path
)
78 def ReadOneFile(path
):
79 with
open(path
, mode
="rb") as f
:
84 raise Exception("File %s is short (< 8 bytes)" % path
)
85 bits
= ReadMagicAndReturnBitness(f
, path
)
88 s
= struct
.unpack_from(TypeCodeForStruct(bits
) * (w
), f
.read(size
))
89 sys
.stderr
.write("%s: read %d %d-bit PCs from %s\n" % (prog_name
, w
, bits
, path
))
96 s
= s
.union(set(ReadOneFile(f
)))
98 "%s: %d files merged; %d PCs total\n" % (prog_name
, len(files
), len(s
))
103 def PrintFiles(files
):
106 else: # If there is just on file, print the PCs in order.
107 s
= ReadOneFile(files
[0])
108 sys
.stderr
.write("%s: 1 file merged; %d PCs total\n" % (prog_name
, len(s
)))
113 def MergeAndPrint(files
):
114 if sys
.stdout
.isatty():
118 if max(s
) > 0xFFFFFFFF:
120 stdout_buf
= getattr(sys
.stdout
, "buffer", sys
.stdout
)
121 array
.array("I", MagicForBits(bits
)).tofile(stdout_buf
)
122 a
= struct
.pack(TypeCodeForStruct(bits
) * len(s
), *s
)
126 def UnpackOneFile(path
):
127 with
open(path
, mode
="rb") as f
:
128 sys
.stderr
.write("%s: unpacking %s\n" % (prog_name
, path
))
135 pid
, module_length
, blob_size
= struct
.unpack("iII", header
)
136 module
= f
.read(module_length
).decode("utf-8")
137 blob
= f
.read(blob_size
)
138 assert len(module
) == module_length
139 assert len(blob
) == blob_size
140 extracted_file
= "%s.%d.sancov" % (module
, pid
)
141 sys
.stderr
.write("%s: extracting %s\n" % (prog_name
, extracted_file
))
142 # The packed file may contain multiple blobs for the same pid/module
143 # pair. Append to the end of the file instead of overwriting.
144 with
open(extracted_file
, "ab") as f2
:
147 raise Exception("Error reading file %s" % path
)
155 def UnpackOneRawFile(path
, map_path
):
157 with
open(map_path
, mode
="rt") as f_map
:
158 sys
.stderr
.write("%s: reading map %s\n" % (prog_name
, map_path
))
159 bits
= int(f_map
.readline())
160 if bits
!= 32 and bits
!= 64:
161 raise Exception("Wrong bits size in the map")
163 parts
= line
.rstrip().split()
172 mem_map
.sort(key
=lambda m
: m
[0])
173 mem_map_keys
= [m
[0] for m
in mem_map
]
175 with
open(path
, mode
="rb") as f
:
176 sys
.stderr
.write("%s: unpacking %s\n" % (prog_name
, path
))
181 pcs
= struct
.unpack_from(
182 TypeCodeForStruct(bits
) * (size
* 8 // bits
), f
.read(size
)
184 mem_map_pcs
= [[] for i
in range(0, len(mem_map
))]
189 map_idx
= bisect
.bisect(mem_map_keys
, pc
) - 1
190 (start
, end
, base
, module_path
) = mem_map
[map_idx
]
194 "warning: %s: pc %x outside of any known mapping\n"
198 mem_map_pcs
[map_idx
].append(pc
- base
)
200 for ((start
, end
, base
, module_path
), pc_list
) in zip(mem_map
, mem_map_pcs
):
201 if len(pc_list
) == 0:
203 assert path
.endswith(".sancov.raw")
204 dst_path
= module_path
+ "." + os
.path
.basename(path
)[:-4]
206 "%s: writing %d PCs to %s\n" % (prog_name
, len(pc_list
), dst_path
)
208 sorted_pc_list
= sorted(pc_list
)
209 pc_buffer
= struct
.pack(
210 TypeCodeForStruct(bits
) * len(pc_list
), *sorted_pc_list
212 with
open(dst_path
, "ab+") as f2
:
213 array
.array("I", MagicForBits(bits
)).tofile(f2
)
218 def RawUnpack(files
):
220 if not f
.endswith(".sancov.raw"):
221 raise Exception("Unexpected raw file name %s" % f
)
222 f_map
= f
[:-3] + "map"
223 UnpackOneRawFile(f
, f_map
)
226 def GetInstrumentedPCs(binary
):
227 # This looks scary, but all it does is extract all offsets where we call:
228 # - __sanitizer_cov() or __sanitizer_cov_with_check(),
229 # - with call or callq,
230 # - directly or via PLT.
232 r
"objdump --no-show-raw-insn -d %s | "
233 r
"grep '^\s\+[0-9a-f]\+:\s\+call\(q\|\)\s\+\(0x\|\)[0-9a-f]\+ <__sanitizer_cov\(_with_check\|\|_trace_pc_guard\)\(@plt\|\)>' | "
234 r
"grep -o '^\s\+[0-9a-f]\+'" % binary
236 lines
= subprocess
.check_output(cmd
, stdin
=subprocess
.PIPE
, shell
=True).splitlines()
237 # The PCs we get from objdump are off by 4 bytes, as they point to the
238 # beginning of the callq instruction. Empirically this is true on x86 and
240 return set(int(line
.strip(), 16) + 4 for line
in lines
)
243 def PrintMissing(binary
):
244 if not os
.path
.isfile(binary
):
245 raise Exception("File not found: %s" % binary
)
246 instrumented
= GetInstrumentedPCs(binary
)
248 "%s: found %d instrumented PCs in %s\n" % (prog_name
, len(instrumented
), binary
)
250 covered
= set(int(line
, 16) for line
in sys
.stdin
)
251 sys
.stderr
.write("%s: read %d PCs from stdin\n" % (prog_name
, len(covered
)))
252 missing
= instrumented
- covered
253 sys
.stderr
.write("%s: %d PCs missing from coverage\n" % (prog_name
, len(missing
)))
254 if len(missing
) > len(instrumented
) - len(covered
):
256 "%s: WARNING: stdin contains PCs not found in binary\n" % prog_name
258 for pc
in sorted(missing
):
262 if __name__
== "__main__":
263 prog_name
= sys
.argv
[0]
264 if len(sys
.argv
) <= 2:
267 if sys
.argv
[1] == "missing":
268 if len(sys
.argv
) != 3:
270 PrintMissing(sys
.argv
[2])
274 for f
in sys
.argv
[2:]:
275 file_list
+= glob
.glob(f
)
279 if sys
.argv
[1] == "print":
280 PrintFiles(file_list
)
281 elif sys
.argv
[1] == "merge":
282 MergeAndPrint(file_list
)
283 elif sys
.argv
[1] == "unpack":
285 elif sys
.argv
[1] == "rawunpack":