2 # Merge or print the coverage data collected by asan's coverage.
3 # Input files are sequences of 4-byte integers.
4 # We need to merge these integers into a set and then
5 # either print them (as hex) or dump them into another file.
19 " " + prog_name
+ " merge FILE [FILE...] > OUTPUT\n" \
20 " " + prog_name
+ " print FILE [FILE...]\n" \
21 " " + prog_name
+ " unpack FILE [FILE...]\n" \
22 " " + prog_name
+ " rawunpack FILE [FILE ...]\n" \
23 " " + prog_name
+ " missing BINARY < LIST_OF_PCS\n" \
28 if bits
!= 32 and bits
!= 64:
29 raise Exception("Wrong bitness: %d" % bits
)
31 def TypeCodeForBits(bits
):
33 return 'L' if bits
== 64 else 'I'
35 def TypeCodeForStruct(bits
):
37 return 'Q' if bits
== 64 else 'I'
39 kMagic32SecondHalf
= 0xFFFFFF32;
40 kMagic64SecondHalf
= 0xFFFFFF64;
41 kMagicFirstHalf
= 0xC0BFFFFF;
43 def MagicForBits(bits
):
45 if sys
.byteorder
== 'little':
46 return [kMagic64SecondHalf
if bits
== 64 else kMagic32SecondHalf
, kMagicFirstHalf
]
48 return [kMagicFirstHalf
, kMagic64SecondHalf
if bits
== 64 else kMagic32SecondHalf
]
50 def ReadMagicAndReturnBitness(f
, path
):
51 magic_bytes
= f
.read(8)
52 magic_words
= struct
.unpack('II', magic_bytes
);
54 idx
= 1 if sys
.byteorder
== 'little' else 0
55 if magic_words
[idx
] == kMagicFirstHalf
:
56 if magic_words
[1-idx
] == kMagic64SecondHalf
:
58 elif magic_words
[1-idx
] == kMagic32SecondHalf
:
61 raise Exception('Bad magic word in %s' % path
)
64 def ReadOneFile(path
):
65 with
open(path
, mode
="rb") as f
:
70 raise Exception('File %s is short (< 8 bytes)' % path
)
71 bits
= ReadMagicAndReturnBitness(f
, path
)
74 s
= struct
.unpack_from(TypeCodeForStruct(bits
) * (w
), f
.read(size
))
76 "%s: read %d %d-bit PCs from %s\n" % (prog_name
, w
, bits
, path
))
82 s
= s
.union(set(ReadOneFile(f
)))
84 "%s: %d files merged; %d PCs total\n" % (prog_name
, len(files
), len(s
))
88 def PrintFiles(files
):
91 else: # If there is just on file, print the PCs in order.
92 s
= ReadOneFile(files
[0])
93 sys
.stderr
.write("%s: 1 file merged; %d PCs total\n" % (prog_name
, len(s
)))
97 def MergeAndPrint(files
):
98 if sys
.stdout
.isatty():
102 if max(s
) > 0xFFFFFFFF:
104 stdout_buf
= getattr(sys
.stdout
, 'buffer', sys
.stdout
)
105 array
.array('I', MagicForBits(bits
)).tofile(stdout_buf
)
106 a
= struct
.pack(TypeCodeForStruct(bits
) * len(s
), *s
)
110 def UnpackOneFile(path
):
111 with
open(path
, mode
="rb") as f
:
112 sys
.stderr
.write("%s: unpacking %s\n" % (prog_name
, path
))
115 if not header
: return
118 pid
, module_length
, blob_size
= struct
.unpack('iII', header
)
119 module
= f
.read(module_length
).decode('utf-8')
120 blob
= f
.read(blob_size
)
121 assert(len(module
) == module_length
)
122 assert(len(blob
) == blob_size
)
123 extracted_file
= "%s.%d.sancov" % (module
, pid
)
124 sys
.stderr
.write("%s: extracting %s\n" % (prog_name
, extracted_file
))
125 # The packed file may contain multiple blobs for the same pid/module
126 # pair. Append to the end of the file instead of overwriting.
127 with
open(extracted_file
, 'ab') as f2
:
130 raise Exception('Error reading file %s' % path
)
137 def UnpackOneRawFile(path
, map_path
):
139 with
open(map_path
, mode
="rt") as f_map
:
140 sys
.stderr
.write("%s: reading map %s\n" % (prog_name
, map_path
))
141 bits
= int(f_map
.readline())
142 if bits
!= 32 and bits
!= 64:
143 raise Exception('Wrong bits size in the map')
145 parts
= line
.rstrip().split()
146 mem_map
.append((int(parts
[0], 16),
149 ' '.join(parts
[3:])))
150 mem_map
.sort(key
=lambda m
: m
[0])
151 mem_map_keys
= [m
[0] for m
in mem_map
]
153 with
open(path
, mode
="rb") as f
:
154 sys
.stderr
.write("%s: unpacking %s\n" % (prog_name
, path
))
159 pcs
= struct
.unpack_from(TypeCodeForStruct(bits
) * (size
* 8 // bits
), f
.read(size
))
160 mem_map_pcs
= [[] for i
in range(0, len(mem_map
))]
164 map_idx
= bisect
.bisect(mem_map_keys
, pc
) - 1
165 (start
, end
, base
, module_path
) = mem_map
[map_idx
]
168 sys
.stderr
.write("warning: %s: pc %x outside of any known mapping\n" % (prog_name
, pc
))
170 mem_map_pcs
[map_idx
].append(pc
- base
)
172 for ((start
, end
, base
, module_path
), pc_list
) in zip(mem_map
, mem_map_pcs
):
173 if len(pc_list
) == 0: continue
174 assert path
.endswith('.sancov.raw')
175 dst_path
= module_path
+ '.' + os
.path
.basename(path
)[:-4]
176 sys
.stderr
.write("%s: writing %d PCs to %s\n" % (prog_name
, len(pc_list
), dst_path
))
177 sorted_pc_list
= sorted(pc_list
)
178 pc_buffer
= struct
.pack(TypeCodeForStruct(bits
) * len(pc_list
), *sorted_pc_list
)
179 with
open(dst_path
, 'ab+') as f2
:
180 array
.array('I', MagicForBits(bits
)).tofile(f2
)
184 def RawUnpack(files
):
186 if not f
.endswith('.sancov.raw'):
187 raise Exception('Unexpected raw file name %s' % f
)
188 f_map
= f
[:-3] + 'map'
189 UnpackOneRawFile(f
, f_map
)
191 def GetInstrumentedPCs(binary
):
192 # This looks scary, but all it does is extract all offsets where we call:
193 # - __sanitizer_cov() or __sanitizer_cov_with_check(),
194 # - with call or callq,
195 # - directly or via PLT.
196 cmd
= r
"objdump --no-show-raw-insn -d %s | " \
197 r
"grep '^\s\+[0-9a-f]\+:\s\+call\(q\|\)\s\+\(0x\|\)[0-9a-f]\+ <__sanitizer_cov\(_with_check\|\|_trace_pc_guard\)\(@plt\|\)>' | " \
198 r
"grep -o '^\s\+[0-9a-f]\+'" % binary
199 lines
= subprocess
.check_output(cmd
, stdin
=subprocess
.PIPE
, shell
=True).splitlines()
200 # The PCs we get from objdump are off by 4 bytes, as they point to the
201 # beginning of the callq instruction. Empirically this is true on x86 and
203 return set(int(line
.strip(), 16) + 4 for line
in lines
)
205 def PrintMissing(binary
):
206 if not os
.path
.isfile(binary
):
207 raise Exception('File not found: %s' % binary
)
208 instrumented
= GetInstrumentedPCs(binary
)
209 sys
.stderr
.write("%s: found %d instrumented PCs in %s\n" % (prog_name
,
212 covered
= set(int(line
, 16) for line
in sys
.stdin
)
213 sys
.stderr
.write("%s: read %d PCs from stdin\n" % (prog_name
, len(covered
)))
214 missing
= instrumented
- covered
215 sys
.stderr
.write("%s: %d PCs missing from coverage\n" % (prog_name
, len(missing
)))
216 if (len(missing
) > len(instrumented
) - len(covered
)):
218 "%s: WARNING: stdin contains PCs not found in binary\n" % prog_name
220 for pc
in sorted(missing
):
223 if __name__
== '__main__':
224 prog_name
= sys
.argv
[0]
225 if len(sys
.argv
) <= 2:
228 if sys
.argv
[1] == "missing":
229 if len(sys
.argv
) != 3:
231 PrintMissing(sys
.argv
[2])
235 for f
in sys
.argv
[2:]:
236 file_list
+= glob
.glob(f
)
240 if sys
.argv
[1] == "print":
241 PrintFiles(file_list
)
242 elif sys
.argv
[1] == "merge":
243 MergeAndPrint(file_list
)
244 elif sys
.argv
[1] == "unpack":
246 elif sys
.argv
[1] == "rawunpack":