[flang][cuda] Adapt ExternalNameConversion to work in gpu module (#117039)
[llvm-project.git] / compiler-rt / lib / sanitizer_common / scripts / sancov.py
blob31055086350abad21ad57de8e0c6cc2af44b6fa0
1 #!/usr/bin/env python
2 # Merge or print the coverage data collected by asan's coverage.
3 # Input files are sequences of 4-byte integers.
4 # We need to merge these integers into a set and then
5 # either print them (as hex) or dump them into another file.
6 import array
7 import bisect
8 import glob
9 import os.path
10 import struct
11 import subprocess
12 import sys
14 prog_name = ""
17 def Usage():
18 sys.stderr.write(
19 "Usage: \n" + " " + prog_name + " merge FILE [FILE...] > OUTPUT\n"
20 " " + prog_name + " print FILE [FILE...]\n"
21 " " + prog_name + " unpack FILE [FILE...]\n"
22 " " + prog_name + " rawunpack FILE [FILE ...]\n"
23 " " + prog_name + " missing BINARY < LIST_OF_PCS\n"
24 "\n"
26 exit(1)
29 def CheckBits(bits):
30 if bits != 32 and bits != 64:
31 raise Exception("Wrong bitness: %d" % bits)
34 def TypeCodeForBits(bits):
35 CheckBits(bits)
36 return "L" if bits == 64 else "I"
39 def TypeCodeForStruct(bits):
40 CheckBits(bits)
41 return "Q" if bits == 64 else "I"
44 kMagic32SecondHalf = 0xFFFFFF32
45 kMagic64SecondHalf = 0xFFFFFF64
46 kMagicFirstHalf = 0xC0BFFFFF
49 def MagicForBits(bits):
50 CheckBits(bits)
51 if sys.byteorder == "little":
52 return [
53 kMagic64SecondHalf if bits == 64 else kMagic32SecondHalf,
54 kMagicFirstHalf,
56 else:
57 return [
58 kMagicFirstHalf,
59 kMagic64SecondHalf if bits == 64 else kMagic32SecondHalf,
63 def ReadMagicAndReturnBitness(f, path):
64 magic_bytes = f.read(8)
65 magic_words = struct.unpack("II", magic_bytes)
66 bits = 0
67 idx = 1 if sys.byteorder == "little" else 0
68 if magic_words[idx] == kMagicFirstHalf:
69 if magic_words[1 - idx] == kMagic64SecondHalf:
70 bits = 64
71 elif magic_words[1 - idx] == kMagic32SecondHalf:
72 bits = 32
73 if bits == 0:
74 raise Exception("Bad magic word in %s" % path)
75 return bits
78 def ReadOneFile(path):
79 with open(path, mode="rb") as f:
80 f.seek(0, 2)
81 size = f.tell()
82 f.seek(0, 0)
83 if size < 8:
84 raise Exception("File %s is short (< 8 bytes)" % path)
85 bits = ReadMagicAndReturnBitness(f, path)
86 size -= 8
87 w = size * 8 // bits
88 s = struct.unpack_from(TypeCodeForStruct(bits) * (w), f.read(size))
89 sys.stderr.write("%s: read %d %d-bit PCs from %s\n" % (prog_name, w, bits, path))
90 return s
93 def Merge(files):
94 s = set()
95 for f in files:
96 s = s.union(set(ReadOneFile(f)))
97 sys.stderr.write(
98 "%s: %d files merged; %d PCs total\n" % (prog_name, len(files), len(s))
100 return sorted(s)
103 def PrintFiles(files):
104 if len(files) > 1:
105 s = Merge(files)
106 else: # If there is just on file, print the PCs in order.
107 s = ReadOneFile(files[0])
108 sys.stderr.write("%s: 1 file merged; %d PCs total\n" % (prog_name, len(s)))
109 for i in s:
110 print("0x%x" % i)
113 def MergeAndPrint(files):
114 if sys.stdout.isatty():
115 Usage()
116 s = Merge(files)
117 bits = 32
118 if max(s) > 0xFFFFFFFF:
119 bits = 64
120 stdout_buf = getattr(sys.stdout, "buffer", sys.stdout)
121 array.array("I", MagicForBits(bits)).tofile(stdout_buf)
122 a = struct.pack(TypeCodeForStruct(bits) * len(s), *s)
123 stdout_buf.write(a)
126 def UnpackOneFile(path):
127 with open(path, mode="rb") as f:
128 sys.stderr.write("%s: unpacking %s\n" % (prog_name, path))
129 while True:
130 header = f.read(12)
131 if not header:
132 return
133 if len(header) < 12:
134 break
135 pid, module_length, blob_size = struct.unpack("iII", header)
136 module = f.read(module_length).decode("utf-8")
137 blob = f.read(blob_size)
138 assert len(module) == module_length
139 assert len(blob) == blob_size
140 extracted_file = "%s.%d.sancov" % (module, pid)
141 sys.stderr.write("%s: extracting %s\n" % (prog_name, extracted_file))
142 # The packed file may contain multiple blobs for the same pid/module
143 # pair. Append to the end of the file instead of overwriting.
144 with open(extracted_file, "ab") as f2:
145 f2.write(blob)
146 # fail
147 raise Exception("Error reading file %s" % path)
150 def Unpack(files):
151 for f in files:
152 UnpackOneFile(f)
155 def UnpackOneRawFile(path, map_path):
156 mem_map = []
157 with open(map_path, mode="rt") as f_map:
158 sys.stderr.write("%s: reading map %s\n" % (prog_name, map_path))
159 bits = int(f_map.readline())
160 if bits != 32 and bits != 64:
161 raise Exception("Wrong bits size in the map")
162 for line in f_map:
163 parts = line.rstrip().split()
164 mem_map.append(
166 int(parts[0], 16),
167 int(parts[1], 16),
168 int(parts[2], 16),
169 " ".join(parts[3:]),
172 mem_map.sort(key=lambda m: m[0])
173 mem_map_keys = [m[0] for m in mem_map]
175 with open(path, mode="rb") as f:
176 sys.stderr.write("%s: unpacking %s\n" % (prog_name, path))
178 f.seek(0, 2)
179 size = f.tell()
180 f.seek(0, 0)
181 pcs = struct.unpack_from(
182 TypeCodeForStruct(bits) * (size * 8 // bits), f.read(size)
184 mem_map_pcs = [[] for i in range(0, len(mem_map))]
186 for pc in pcs:
187 if pc == 0:
188 continue
189 map_idx = bisect.bisect(mem_map_keys, pc) - 1
190 (start, end, base, module_path) = mem_map[map_idx]
191 assert pc >= start
192 if pc >= end:
193 sys.stderr.write(
194 "warning: %s: pc %x outside of any known mapping\n"
195 % (prog_name, pc)
197 continue
198 mem_map_pcs[map_idx].append(pc - base)
200 for ((start, end, base, module_path), pc_list) in zip(mem_map, mem_map_pcs):
201 if len(pc_list) == 0:
202 continue
203 assert path.endswith(".sancov.raw")
204 dst_path = module_path + "." + os.path.basename(path)[:-4]
205 sys.stderr.write(
206 "%s: writing %d PCs to %s\n" % (prog_name, len(pc_list), dst_path)
208 sorted_pc_list = sorted(pc_list)
209 pc_buffer = struct.pack(
210 TypeCodeForStruct(bits) * len(pc_list), *sorted_pc_list
212 with open(dst_path, "ab+") as f2:
213 array.array("I", MagicForBits(bits)).tofile(f2)
214 f2.seek(0, 2)
215 f2.write(pc_buffer)
218 def RawUnpack(files):
219 for f in files:
220 if not f.endswith(".sancov.raw"):
221 raise Exception("Unexpected raw file name %s" % f)
222 f_map = f[:-3] + "map"
223 UnpackOneRawFile(f, f_map)
226 def GetInstrumentedPCs(binary):
227 # This looks scary, but all it does is extract all offsets where we call:
228 # - __sanitizer_cov() or __sanitizer_cov_with_check(),
229 # - with call or callq,
230 # - directly or via PLT.
231 cmd = (
232 r"objdump --no-show-raw-insn -d %s | "
233 r"grep '^\s\+[0-9a-f]\+:\s\+call\(q\|\)\s\+\(0x\|\)[0-9a-f]\+ <__sanitizer_cov\(_with_check\|\|_trace_pc_guard\)\(@plt\|\)>' | "
234 r"grep -o '^\s\+[0-9a-f]\+'" % binary
236 lines = subprocess.check_output(cmd, stdin=subprocess.PIPE, shell=True).splitlines()
237 # The PCs we get from objdump are off by 4 bytes, as they point to the
238 # beginning of the callq instruction. Empirically this is true on x86 and
239 # x86_64.
240 return set(int(line.strip(), 16) + 4 for line in lines)
243 def PrintMissing(binary):
244 if not os.path.isfile(binary):
245 raise Exception("File not found: %s" % binary)
246 instrumented = GetInstrumentedPCs(binary)
247 sys.stderr.write(
248 "%s: found %d instrumented PCs in %s\n" % (prog_name, len(instrumented), binary)
250 covered = set(int(line, 16) for line in sys.stdin)
251 sys.stderr.write("%s: read %d PCs from stdin\n" % (prog_name, len(covered)))
252 missing = instrumented - covered
253 sys.stderr.write("%s: %d PCs missing from coverage\n" % (prog_name, len(missing)))
254 if len(missing) > len(instrumented) - len(covered):
255 sys.stderr.write(
256 "%s: WARNING: stdin contains PCs not found in binary\n" % prog_name
258 for pc in sorted(missing):
259 print("0x%x" % pc)
262 if __name__ == "__main__":
263 prog_name = sys.argv[0]
264 if len(sys.argv) <= 2:
265 Usage()
267 if sys.argv[1] == "missing":
268 if len(sys.argv) != 3:
269 Usage()
270 PrintMissing(sys.argv[2])
271 exit(0)
273 file_list = []
274 for f in sys.argv[2:]:
275 file_list += glob.glob(f)
276 if not file_list:
277 Usage()
279 if sys.argv[1] == "print":
280 PrintFiles(file_list)
281 elif sys.argv[1] == "merge":
282 MergeAndPrint(file_list)
283 elif sys.argv[1] == "unpack":
284 Unpack(file_list)
285 elif sys.argv[1] == "rawunpack":
286 RawUnpack(file_list)
287 else:
288 Usage()