Fix bug in load time stats.
[chromium-blink-merge.git] / tools / valgrind / memcheck_analyze.py
blob4b6ce300d1c734218c938d28b5ed1cfd46ca01e5
1 #!/usr/bin/env python
2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 # memcheck_analyze.py
8 ''' Given a valgrind XML file, parses errors and uniques them.'''
10 import gdb_helper
12 from collections import defaultdict
13 import hashlib
14 import logging
15 import optparse
16 import os
17 import re
18 import subprocess
19 import sys
20 import time
21 from xml.dom.minidom import parse
22 from xml.parsers.expat import ExpatError
24 import common
26 # Global symbol table (yuck)
27 TheAddressTable = None
29 # These are regexps that define functions (using C++ mangled names)
30 # we don't want to see in stack traces while pretty printing
31 # or generating suppressions.
32 # Just stop printing the stack/suppression frames when the current one
33 # matches any of these.
34 _BORING_CALLERS = common.BoringCallers(mangled=True, use_re_wildcards=True)
36 def getTextOf(top_node, name):
37 ''' Returns all text in all DOM nodes with a certain |name| that are children
38 of |top_node|.
39 '''
41 text = ""
42 for nodes_named in top_node.getElementsByTagName(name):
43 text += "".join([node.data for node in nodes_named.childNodes
44 if node.nodeType == node.TEXT_NODE])
45 return text
47 def getCDATAOf(top_node, name):
48 ''' Returns all CDATA in all DOM nodes with a certain |name| that are children
49 of |top_node|.
50 '''
52 text = ""
53 for nodes_named in top_node.getElementsByTagName(name):
54 text += "".join([node.data for node in nodes_named.childNodes
55 if node.nodeType == node.CDATA_SECTION_NODE])
56 if (text == ""):
57 return None
58 return text
60 def shortenFilePath(source_dir, directory):
61 '''Returns a string with the string prefix |source_dir| removed from
62 |directory|.'''
63 prefixes_to_cut = ["build/src/", "valgrind/coregrind/"]
65 if source_dir:
66 prefixes_to_cut.append(source_dir)
68 for p in prefixes_to_cut:
69 index = directory.rfind(p)
70 if index != -1:
71 directory = directory[index + len(p):]
73 return directory
75 # Constants that give real names to the abbreviations in valgrind XML output.
76 INSTRUCTION_POINTER = "ip"
77 OBJECT_FILE = "obj"
78 FUNCTION_NAME = "fn"
79 SRC_FILE_DIR = "dir"
80 SRC_FILE_NAME = "file"
81 SRC_LINE = "line"
83 def gatherFrames(node, source_dir):
84 frames = []
85 for frame in node.getElementsByTagName("frame"):
86 frame_dict = {
87 INSTRUCTION_POINTER : getTextOf(frame, INSTRUCTION_POINTER),
88 OBJECT_FILE : getTextOf(frame, OBJECT_FILE),
89 FUNCTION_NAME : getTextOf(frame, FUNCTION_NAME),
90 SRC_FILE_DIR : shortenFilePath(
91 source_dir, getTextOf(frame, SRC_FILE_DIR)),
92 SRC_FILE_NAME : getTextOf(frame, SRC_FILE_NAME),
93 SRC_LINE : getTextOf(frame, SRC_LINE)
96 # Ignore this frame and all the following if it's a "boring" function.
97 enough_frames = False
98 for regexp in _BORING_CALLERS:
99 if re.match("^%s$" % regexp, frame_dict[FUNCTION_NAME]):
100 enough_frames = True
101 break
102 if enough_frames:
103 break
105 frames += [frame_dict]
107 global TheAddressTable
108 if TheAddressTable != None and frame_dict[SRC_LINE] == "":
109 # Try using gdb
110 TheAddressTable.Add(frame_dict[OBJECT_FILE],
111 frame_dict[INSTRUCTION_POINTER])
112 return frames
114 class ValgrindError:
115 ''' Takes a <DOM Element: error> node and reads all the data from it. A
116 ValgrindError is immutable and is hashed on its pretty printed output.
119 def __init__(self, source_dir, error_node, commandline, testcase):
120 ''' Copies all the relevant information out of the DOM and into object
121 properties.
123 Args:
124 error_node: The <error></error> DOM node we're extracting from.
125 source_dir: Prefix that should be stripped from the <dir> node.
126 commandline: The command that was run under valgrind
127 testcase: The test case name, if known.
130 # Valgrind errors contain one <what><stack> pair, plus an optional
131 # <auxwhat><stack> pair, plus an optional <origin><what><stack></origin>,
132 # plus (since 3.5.0) a <suppression></suppression> pair.
133 # (Origin is nicely enclosed; too bad the other two aren't.)
134 # The most common way to see all three in one report is
135 # a syscall with a parameter that points to uninitialized memory, e.g.
136 # Format:
137 # <error>
138 # <unique>0x6d</unique>
139 # <tid>1</tid>
140 # <kind>SyscallParam</kind>
141 # <what>Syscall param write(buf) points to uninitialised byte(s)</what>
142 # <stack>
143 # <frame>
144 # ...
145 # </frame>
146 # </stack>
147 # <auxwhat>Address 0x5c9af4f is 7 bytes inside a block of ...</auxwhat>
148 # <stack>
149 # <frame>
150 # ...
151 # </frame>
152 # </stack>
153 # <origin>
154 # <what>Uninitialised value was created by a heap allocation</what>
155 # <stack>
156 # <frame>
157 # ...
158 # </frame>
159 # </stack>
160 # </origin>
161 # <suppression>
162 # <sname>insert_a_suppression_name_here</sname>
163 # <skind>Memcheck:Param</skind>
164 # <skaux>write(buf)</skaux>
165 # <sframe> <fun>__write_nocancel</fun> </sframe>
166 # ...
167 # <sframe> <fun>main</fun> </sframe>
168 # <rawtext>
169 # <![CDATA[
171 # <insert_a_suppression_name_here>
172 # Memcheck:Param
173 # write(buf)
174 # fun:__write_nocancel
175 # ...
176 # fun:main
178 # ]]>
179 # </rawtext>
180 # </suppression>
181 # </error>
183 # Each frame looks like this:
184 # <frame>
185 # <ip>0x83751BC</ip>
186 # <obj>/data/dkegel/chrome-build/src/out/Release/base_unittests</obj>
187 # <fn>_ZN7testing8internal12TestInfoImpl7RunTestEPNS_8TestInfoE</fn>
188 # <dir>/data/dkegel/chrome-build/src/testing/gtest/src</dir>
189 # <file>gtest-internal-inl.h</file>
190 # <line>655</line>
191 # </frame>
192 # although the dir, file, and line elements are missing if there is
193 # no debug info.
195 self._kind = getTextOf(error_node, "kind")
196 self._backtraces = []
197 self._suppression = None
198 self._commandline = commandline
199 self._testcase = testcase
200 self._additional = []
202 # Iterate through the nodes, parsing <what|auxwhat><stack> pairs.
203 description = None
204 for node in error_node.childNodes:
205 if node.localName == "what" or node.localName == "auxwhat":
206 description = "".join([n.data for n in node.childNodes
207 if n.nodeType == n.TEXT_NODE])
208 elif node.localName == "xwhat":
209 description = getTextOf(node, "text")
210 elif node.localName == "stack":
211 assert description
212 self._backtraces.append([description, gatherFrames(node, source_dir)])
213 description = None
214 elif node.localName == "origin":
215 description = getTextOf(node, "what")
216 stack = node.getElementsByTagName("stack")[0]
217 frames = gatherFrames(stack, source_dir)
218 self._backtraces.append([description, frames])
219 description = None
220 stack = None
221 frames = None
222 elif description and node.localName != None:
223 # The lastest description has no stack, e.g. "Address 0x28 is unknown"
224 self._additional.append(description)
225 description = None
227 if node.localName == "suppression":
228 self._suppression = getCDATAOf(node, "rawtext");
230 def __str__(self):
231 ''' Pretty print the type and backtrace(s) of this specific error,
232 including suppression (which is just a mangled backtrace).'''
233 output = ""
234 if (self._commandline):
235 output += self._commandline + "\n"
237 output += self._kind + "\n"
238 for backtrace in self._backtraces:
239 output += backtrace[0] + "\n"
240 filter = subprocess.Popen("c++filt -n", stdin=subprocess.PIPE,
241 stdout=subprocess.PIPE,
242 stderr=subprocess.STDOUT,
243 shell=True,
244 close_fds=True)
245 buf = ""
246 for frame in backtrace[1]:
247 buf += (frame[FUNCTION_NAME] or frame[INSTRUCTION_POINTER]) + "\n"
248 (stdoutbuf, stderrbuf) = filter.communicate(buf.encode('latin-1'))
249 demangled_names = stdoutbuf.split("\n")
251 i = 0
252 for frame in backtrace[1]:
253 output += (" " + demangled_names[i])
254 i = i + 1
256 global TheAddressTable
257 if TheAddressTable != None and frame[SRC_FILE_DIR] == "":
258 # Try using gdb
259 foo = TheAddressTable.GetFileLine(frame[OBJECT_FILE],
260 frame[INSTRUCTION_POINTER])
261 if foo[0] != None:
262 output += (" (" + foo[0] + ":" + foo[1] + ")")
263 elif frame[SRC_FILE_DIR] != "":
264 output += (" (" + frame[SRC_FILE_DIR] + "/" + frame[SRC_FILE_NAME] +
265 ":" + frame[SRC_LINE] + ")")
266 else:
267 output += " (" + frame[OBJECT_FILE] + ")"
268 output += "\n"
270 for additional in self._additional:
271 output += additional + "\n"
273 assert self._suppression != None, "Your Valgrind doesn't generate " \
274 "suppressions - is it too old?"
276 if self._testcase:
277 output += "The report came from the `%s` test.\n" % self._testcase
278 output += "Suppression (error hash=#%016X#):\n" % self.ErrorHash()
279 output += (" For more info on using suppressions see "
280 "http://dev.chromium.org/developers/tree-sheriffs/sheriff-details-chromium/memory-sheriff#TOC-Suppressing-memory-reports")
282 # Widen suppression slightly to make portable between mac and linux
283 # TODO(timurrrr): Oops, these transformations should happen
284 # BEFORE calculating the hash!
285 supp = self._suppression;
286 supp = supp.replace("fun:_Znwj", "fun:_Znw*")
287 supp = supp.replace("fun:_Znwm", "fun:_Znw*")
288 supp = supp.replace("fun:_Znaj", "fun:_Zna*")
289 supp = supp.replace("fun:_Znam", "fun:_Zna*")
291 # Make suppressions even less platform-dependent.
292 for sz in [1, 2, 4, 8]:
293 supp = supp.replace("Memcheck:Addr%d" % sz, "Memcheck:Unaddressable")
294 supp = supp.replace("Memcheck:Value%d" % sz, "Memcheck:Uninitialized")
295 supp = supp.replace("Memcheck:Cond", "Memcheck:Uninitialized")
297 # Split into lines so we can enforce length limits
298 supplines = supp.split("\n")
299 supp = None # to avoid re-use
301 # Truncate at line 26 (VG_MAX_SUPP_CALLERS plus 2 for name and type)
302 # or at the first 'boring' caller.
303 # (https://bugs.kde.org/show_bug.cgi?id=199468 proposes raising
304 # VG_MAX_SUPP_CALLERS, but we're probably fine with it as is.)
305 newlen = min(26, len(supplines));
307 # Drop boring frames and all the following.
308 enough_frames = False
309 for frameno in range(newlen):
310 for boring_caller in _BORING_CALLERS:
311 if re.match("^ +fun:%s$" % boring_caller, supplines[frameno]):
312 newlen = frameno
313 enough_frames = True
314 break
315 if enough_frames:
316 break
317 if (len(supplines) > newlen):
318 supplines = supplines[0:newlen]
319 supplines.append("}")
321 for frame in range(len(supplines)):
322 # Replace the always-changing anonymous namespace prefix with "*".
323 m = re.match("( +fun:)_ZN.*_GLOBAL__N_.*\.cc_" +
324 "[0-9a-fA-F]{8}_[0-9a-fA-F]{8}(.*)",
325 supplines[frame])
326 if m:
327 supplines[frame] = "*".join(m.groups())
329 output += "\n".join(supplines) + "\n"
331 return output
333 def UniqueString(self):
334 ''' String to use for object identity. Don't print this, use str(obj)
335 instead.'''
336 rep = self._kind + " "
337 for backtrace in self._backtraces:
338 for frame in backtrace[1]:
339 rep += frame[FUNCTION_NAME]
341 if frame[SRC_FILE_DIR] != "":
342 rep += frame[SRC_FILE_DIR] + "/" + frame[SRC_FILE_NAME]
343 else:
344 rep += frame[OBJECT_FILE]
346 return rep
348 # This is a device-independent hash identifying the suppression.
349 # By printing out this hash we can find duplicate reports between tests and
350 # different shards running on multiple buildbots
351 def ErrorHash(self):
352 return int(hashlib.md5(self.UniqueString()).hexdigest()[:16], 16)
354 def __hash__(self):
355 return hash(self.UniqueString())
356 def __eq__(self, rhs):
357 return self.UniqueString() == rhs
359 def log_is_finished(f, force_finish):
360 f.seek(0)
361 prev_line = ""
362 while True:
363 line = f.readline()
364 if line == "":
365 if not force_finish:
366 return False
367 # Okay, the log is not finished but we can make it up to be parseable:
368 if prev_line.strip() in ["</error>", "</errorcounts>", "</status>"]:
369 f.write("</valgrindoutput>\n")
370 return True
371 return False
372 if '</valgrindoutput>' in line:
373 # Valgrind often has garbage after </valgrindoutput> upon crash.
374 f.truncate()
375 return True
376 prev_line = line
378 class MemcheckAnalyzer:
379 ''' Given a set of Valgrind XML files, parse all the errors out of them,
380 unique them and output the results.'''
382 SANITY_TEST_SUPPRESSIONS = {
383 "Memcheck sanity test 01 (memory leak).": 1,
384 "Memcheck sanity test 02 (malloc/read left).": 1,
385 "Memcheck sanity test 03 (malloc/read right).": 1,
386 "Memcheck sanity test 04 (malloc/write left).": 1,
387 "Memcheck sanity test 05 (malloc/write right).": 1,
388 "Memcheck sanity test 06 (new/read left).": 1,
389 "Memcheck sanity test 07 (new/read right).": 1,
390 "Memcheck sanity test 08 (new/write left).": 1,
391 "Memcheck sanity test 09 (new/write right).": 1,
392 "Memcheck sanity test 10 (write after free).": 1,
393 "Memcheck sanity test 11 (write after delete).": 1,
394 "Memcheck sanity test 12 (array deleted without []).": 1,
395 "Memcheck sanity test 13 (single element deleted with []).": 1,
396 "Memcheck sanity test 14 (malloc/read uninit).": 1,
397 "Memcheck sanity test 15 (new/read uninit).": 1,
400 # Max time to wait for memcheck logs to complete.
401 LOG_COMPLETION_TIMEOUT = 180.0
403 def __init__(self, source_dir, show_all_leaks=False, use_gdb=False):
404 '''Create a parser for Memcheck logs.
406 Args:
407 source_dir: Path to top of source tree for this build
408 show_all_leaks: Whether to show even less important leaks
409 use_gdb: Whether to use gdb to resolve source filenames and line numbers
410 in the report stacktraces
412 self._source_dir = source_dir
413 self._show_all_leaks = show_all_leaks
414 self._use_gdb = use_gdb
416 # Contains the set of unique errors
417 self._errors = set()
419 # Contains the time when the we started analyzing the first log file.
420 # This variable is used to skip incomplete logs after some timeout.
421 self._analyze_start_time = None
424 def Report(self, files, testcase, check_sanity=False):
425 '''Reads in a set of files and prints Memcheck report.
427 Args:
428 files: A list of filenames.
429 check_sanity: if true, search for SANITY_TEST_SUPPRESSIONS
431 # Beyond the detailed errors parsed by ValgrindError above,
432 # the xml file contain records describing suppressions that were used:
433 # <suppcounts>
434 # <pair>
435 # <count>28</count>
436 # <name>pango_font_leak_todo</name>
437 # </pair>
438 # <pair>
439 # <count>378</count>
440 # <name>bug_13243</name>
441 # </pair>
442 # </suppcounts
443 # Collect these and print them at the end.
445 # With our patch for https://bugs.kde.org/show_bug.cgi?id=205000 in,
446 # the file also includes records of the form
447 # <load_obj><obj>/usr/lib/libgcc_s.1.dylib</obj><ip>0x27000</ip></load_obj>
448 # giving the filename and load address of each binary that was mapped
449 # into the process.
451 global TheAddressTable
452 if self._use_gdb:
453 TheAddressTable = gdb_helper.AddressTable()
454 else:
455 TheAddressTable = None
456 cur_report_errors = set()
457 suppcounts = defaultdict(int)
458 badfiles = set()
460 if self._analyze_start_time == None:
461 self._analyze_start_time = time.time()
462 start_time = self._analyze_start_time
464 parse_failed = False
465 for file in files:
466 # Wait up to three minutes for valgrind to finish writing all files,
467 # but after that, just skip incomplete files and warn.
468 f = open(file, "r+")
469 pid = re.match(".*\.([0-9]+)$", file)
470 if pid:
471 pid = pid.groups()[0]
472 found = False
473 running = True
474 firstrun = True
475 skip = False
476 origsize = os.path.getsize(file)
477 while (running and not found and not skip and
478 (firstrun or
479 ((time.time() - start_time) < self.LOG_COMPLETION_TIMEOUT))):
480 firstrun = False
481 f.seek(0)
482 if pid:
483 # Make sure the process is still running so we don't wait for
484 # 3 minutes if it was killed. See http://crbug.com/17453
485 ps_out = subprocess.Popen("ps p %s" % pid, shell=True,
486 stdout=subprocess.PIPE).stdout
487 if len(ps_out.readlines()) < 2:
488 running = False
489 else:
490 skip = True
491 running = False
492 found = log_is_finished(f, False)
493 if not running and not found:
494 logging.warn("Valgrind process PID = %s is not running but its "
495 "XML log has not been finished correctly.\nMake it up"
496 "by adding some closing tags manually." % pid)
497 found = log_is_finished(f, not running)
498 if running and not found:
499 time.sleep(1)
500 f.close()
501 if not found:
502 badfiles.add(file)
503 else:
504 newsize = os.path.getsize(file)
505 if origsize > newsize+1:
506 logging.warn(str(origsize - newsize) +
507 " bytes of junk were after </valgrindoutput> in %s!" %
508 file)
509 try:
510 parsed_file = parse(file);
511 except ExpatError, e:
512 parse_failed = True
513 logging.warn("could not parse %s: %s" % (file, e))
514 lineno = e.lineno - 1
515 context_lines = 5
516 context_start = max(0, lineno - context_lines)
517 context_end = lineno + context_lines + 1
518 context_file = open(file, "r")
519 for i in range(0, context_start):
520 context_file.readline()
521 for i in range(context_start, context_end):
522 context_data = context_file.readline().rstrip()
523 if i != lineno:
524 logging.warn(" %s" % context_data)
525 else:
526 logging.warn("> %s" % context_data)
527 context_file.close()
528 continue
529 if TheAddressTable != None:
530 load_objs = parsed_file.getElementsByTagName("load_obj")
531 for load_obj in load_objs:
532 obj = getTextOf(load_obj, "obj")
533 ip = getTextOf(load_obj, "ip")
534 TheAddressTable.AddBinaryAt(obj, ip)
536 commandline = None
537 preamble = parsed_file.getElementsByTagName("preamble")[0];
538 for node in preamble.getElementsByTagName("line"):
539 if node.localName == "line":
540 for x in node.childNodes:
541 if x.nodeType == node.TEXT_NODE and "Command" in x.data:
542 commandline = x.data
543 break
545 raw_errors = parsed_file.getElementsByTagName("error")
546 for raw_error in raw_errors:
547 # Ignore "possible" leaks for now by default.
548 if (self._show_all_leaks or
549 getTextOf(raw_error, "kind") != "Leak_PossiblyLost"):
550 error = ValgrindError(self._source_dir,
551 raw_error, commandline, testcase)
552 if error not in cur_report_errors:
553 # We haven't seen such errors doing this report yet...
554 if error in self._errors:
555 # ... but we saw it in earlier reports, e.g. previous UI test
556 cur_report_errors.add("This error was already printed in "
557 "some other test, see 'hash=#%016X#'" % \
558 error.ErrorHash())
559 else:
560 # ... and we haven't seen it in other tests as well
561 self._errors.add(error)
562 cur_report_errors.add(error)
564 suppcountlist = parsed_file.getElementsByTagName("suppcounts")
565 if len(suppcountlist) > 0:
566 suppcountlist = suppcountlist[0]
567 for node in suppcountlist.getElementsByTagName("pair"):
568 count = getTextOf(node, "count");
569 name = getTextOf(node, "name");
570 suppcounts[name] += int(count)
572 if len(badfiles) > 0:
573 logging.warn("valgrind didn't finish writing %d files?!" % len(badfiles))
574 for file in badfiles:
575 logging.warn("Last 20 lines of %s :" % file)
576 os.system("tail -n 20 '%s' 1>&2" % file)
578 if parse_failed:
579 logging.error("FAIL! Couldn't parse Valgrind output file")
580 return -2
582 common.PrintUsedSuppressionsList(suppcounts)
584 retcode = 0
585 if cur_report_errors:
586 logging.error("FAIL! There were %s errors: " % len(cur_report_errors))
588 if TheAddressTable != None:
589 TheAddressTable.ResolveAll()
591 for error in cur_report_errors:
592 logging.error(error)
594 retcode = -1
596 # Report tool's insanity even if there were errors.
597 if check_sanity:
598 remaining_sanity_supp = MemcheckAnalyzer.SANITY_TEST_SUPPRESSIONS
599 for (name, count) in suppcounts.iteritems():
600 if (name in remaining_sanity_supp and
601 remaining_sanity_supp[name] == count):
602 del remaining_sanity_supp[name]
603 if remaining_sanity_supp:
604 logging.error("FAIL! Sanity check failed!")
605 logging.info("The following test errors were not handled: ")
606 for (name, count) in remaining_sanity_supp.iteritems():
607 logging.info(" * %dx %s" % (count, name))
608 retcode = -3
610 if retcode != 0:
611 return retcode
613 logging.info("PASS! No errors found!")
614 return 0
617 def _main():
618 '''For testing only. The MemcheckAnalyzer class should be imported instead.'''
619 parser = optparse.OptionParser("usage: %prog [options] <files to analyze>")
620 parser.add_option("", "--source_dir",
621 help="path to top of source tree for this build"
622 "(used to normalize source paths in baseline)")
624 (options, args) = parser.parse_args()
625 if len(args) == 0:
626 parser.error("no filename specified")
627 filenames = args
629 analyzer = MemcheckAnalyzer(options.source_dir, use_gdb=True)
630 return analyzer.Report(filenames, None)
633 if __name__ == "__main__":
634 sys.exit(_main())