2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
8 ''' Given a valgrind XML file, parses errors and uniques them.'''
12 from collections
import defaultdict
21 from xml
.dom
.minidom
import parse
22 from xml
.parsers
.expat
import ExpatError
26 # Global symbol table (yuck)
27 TheAddressTable
= None
29 # These are regexps that define functions (using C++ mangled names)
30 # we don't want to see in stack traces while pretty printing
31 # or generating suppressions.
32 # Just stop printing the stack/suppression frames when the current one
33 # matches any of these.
34 _BORING_CALLERS
= common
.BoringCallers(mangled
=True, use_re_wildcards
=True)
36 def getTextOf(top_node
, name
):
37 ''' Returns all text in all DOM nodes with a certain |name| that are children
42 for nodes_named
in top_node
.getElementsByTagName(name
):
43 text
+= "".join([node
.data
for node
in nodes_named
.childNodes
44 if node
.nodeType
== node
.TEXT_NODE
])
47 def getCDATAOf(top_node
, name
):
48 ''' Returns all CDATA in all DOM nodes with a certain |name| that are children
53 for nodes_named
in top_node
.getElementsByTagName(name
):
54 text
+= "".join([node
.data
for node
in nodes_named
.childNodes
55 if node
.nodeType
== node
.CDATA_SECTION_NODE
])
60 def shortenFilePath(source_dir
, directory
):
61 '''Returns a string with the string prefix |source_dir| removed from
63 prefixes_to_cut
= ["build/src/", "valgrind/coregrind/", "out/Release/../../"]
66 prefixes_to_cut
.append(source_dir
)
68 for p
in prefixes_to_cut
:
69 index
= directory
.rfind(p
)
71 directory
= directory
[index
+ len(p
):]
75 # Constants that give real names to the abbreviations in valgrind XML output.
76 INSTRUCTION_POINTER
= "ip"
80 SRC_FILE_NAME
= "file"
83 def gatherFrames(node
, source_dir
):
85 for frame
in node
.getElementsByTagName("frame"):
87 INSTRUCTION_POINTER
: getTextOf(frame
, INSTRUCTION_POINTER
),
88 OBJECT_FILE
: getTextOf(frame
, OBJECT_FILE
),
89 FUNCTION_NAME
: getTextOf(frame
, FUNCTION_NAME
),
90 SRC_FILE_DIR
: shortenFilePath(
91 source_dir
, getTextOf(frame
, SRC_FILE_DIR
)),
92 SRC_FILE_NAME
: getTextOf(frame
, SRC_FILE_NAME
),
93 SRC_LINE
: getTextOf(frame
, SRC_LINE
)
96 # Ignore this frame and all the following if it's a "boring" function.
98 for regexp
in _BORING_CALLERS
:
99 if re
.match("^%s$" % regexp
, frame_dict
[FUNCTION_NAME
]):
105 frames
+= [frame_dict
]
107 global TheAddressTable
108 if TheAddressTable
!= None and frame_dict
[SRC_LINE
] == "":
110 TheAddressTable
.Add(frame_dict
[OBJECT_FILE
],
111 frame_dict
[INSTRUCTION_POINTER
])
115 ''' Takes a <DOM Element: error> node and reads all the data from it. A
116 ValgrindError is immutable and is hashed on its pretty printed output.
119 def __init__(self
, source_dir
, error_node
, commandline
, testcase
):
120 ''' Copies all the relevant information out of the DOM and into object
124 error_node: The <error></error> DOM node we're extracting from.
125 source_dir: Prefix that should be stripped from the <dir> node.
126 commandline: The command that was run under valgrind
127 testcase: The test case name, if known.
130 # Valgrind errors contain one <what><stack> pair, plus an optional
131 # <auxwhat><stack> pair, plus an optional <origin><what><stack></origin>,
132 # plus (since 3.5.0) a <suppression></suppression> pair.
133 # (Origin is nicely enclosed; too bad the other two aren't.)
134 # The most common way to see all three in one report is
135 # a syscall with a parameter that points to uninitialized memory, e.g.
138 # <unique>0x6d</unique>
140 # <kind>SyscallParam</kind>
141 # <what>Syscall param write(buf) points to uninitialised byte(s)</what>
147 # <auxwhat>Address 0x5c9af4f is 7 bytes inside a block of ...</auxwhat>
154 # <what>Uninitialised value was created by a heap allocation</what>
162 # <sname>insert_a_suppression_name_here</sname>
163 # <skind>Memcheck:Param</skind>
164 # <skaux>write(buf)</skaux>
165 # <sframe> <fun>__write_nocancel</fun> </sframe>
167 # <sframe> <fun>main</fun> </sframe>
171 # <insert_a_suppression_name_here>
174 # fun:__write_nocancel
183 # Each frame looks like this:
186 # <obj>/data/dkegel/chrome-build/src/out/Release/base_unittests</obj>
187 # <fn>_ZN7testing8internal12TestInfoImpl7RunTestEPNS_8TestInfoE</fn>
188 # <dir>/data/dkegel/chrome-build/src/testing/gtest/src</dir>
189 # <file>gtest-internal-inl.h</file>
192 # although the dir, file, and line elements are missing if there is
195 self
._kind
= getTextOf(error_node
, "kind")
196 self
._backtraces
= []
197 self
._suppression
= None
198 self
._commandline
= commandline
199 self
._testcase
= testcase
200 self
._additional
= []
202 # Iterate through the nodes, parsing <what|auxwhat><stack> pairs.
204 for node
in error_node
.childNodes
:
205 if node
.localName
== "what" or node
.localName
== "auxwhat":
206 description
= "".join([n
.data
for n
in node
.childNodes
207 if n
.nodeType
== n
.TEXT_NODE
])
208 elif node
.localName
== "xwhat":
209 description
= getTextOf(node
, "text")
210 elif node
.localName
== "stack":
212 self
._backtraces
.append([description
, gatherFrames(node
, source_dir
)])
214 elif node
.localName
== "origin":
215 description
= getTextOf(node
, "what")
216 stack
= node
.getElementsByTagName("stack")[0]
217 frames
= gatherFrames(stack
, source_dir
)
218 self
._backtraces
.append([description
, frames
])
222 elif description
and node
.localName
!= None:
223 # The lastest description has no stack, e.g. "Address 0x28 is unknown"
224 self
._additional
.append(description
)
227 if node
.localName
== "suppression":
228 self
._suppression
= getCDATAOf(node
, "rawtext");
231 ''' Pretty print the type and backtrace(s) of this specific error,
232 including suppression (which is just a mangled backtrace).'''
234 output
+= "\n" # Make sure the ### is at the beginning of line.
235 output
+= "### BEGIN MEMORY TOOL REPORT (error hash=#%016X#)\n" % \
237 if (self
._commandline
):
238 output
+= self
._commandline
+ "\n"
240 output
+= self
._kind
+ "\n"
241 for backtrace
in self
._backtraces
:
242 output
+= backtrace
[0] + "\n"
243 filter = subprocess
.Popen("c++filt -n", stdin
=subprocess
.PIPE
,
244 stdout
=subprocess
.PIPE
,
245 stderr
=subprocess
.STDOUT
,
249 for frame
in backtrace
[1]:
250 buf
+= (frame
[FUNCTION_NAME
] or frame
[INSTRUCTION_POINTER
]) + "\n"
251 (stdoutbuf
, stderrbuf
) = filter.communicate(buf
.encode('latin-1'))
252 demangled_names
= stdoutbuf
.split("\n")
255 for frame
in backtrace
[1]:
256 output
+= (" " + demangled_names
[i
])
259 global TheAddressTable
260 if TheAddressTable
!= None and frame
[SRC_FILE_DIR
] == "":
262 foo
= TheAddressTable
.GetFileLine(frame
[OBJECT_FILE
],
263 frame
[INSTRUCTION_POINTER
])
265 output
+= (" (" + foo
[0] + ":" + foo
[1] + ")")
266 elif frame
[SRC_FILE_DIR
] != "":
267 output
+= (" (" + frame
[SRC_FILE_DIR
] + "/" + frame
[SRC_FILE_NAME
] +
268 ":" + frame
[SRC_LINE
] + ")")
270 output
+= " (" + frame
[OBJECT_FILE
] + ")"
273 for additional
in self
._additional
:
274 output
+= additional
+ "\n"
276 assert self
._suppression
!= None, "Your Valgrind doesn't generate " \
277 "suppressions - is it too old?"
280 output
+= "The report came from the `%s` test.\n" % self
._testcase
281 output
+= "Suppression (error hash=#%016X#):\n" % self
.ErrorHash()
282 output
+= (" For more info on using suppressions see "
283 "http://dev.chromium.org/developers/tree-sheriffs/sheriff-details-chromium/memory-sheriff#TOC-Suppressing-memory-reports")
285 # Widen suppression slightly to make portable between mac and linux
286 # TODO(timurrrr): Oops, these transformations should happen
287 # BEFORE calculating the hash!
288 supp
= self
._suppression
;
289 supp
= supp
.replace("fun:_Znwj", "fun:_Znw*")
290 supp
= supp
.replace("fun:_Znwm", "fun:_Znw*")
291 supp
= supp
.replace("fun:_Znaj", "fun:_Zna*")
292 supp
= supp
.replace("fun:_Znam", "fun:_Zna*")
294 # Make suppressions even less platform-dependent.
295 for sz
in [1, 2, 4, 8]:
296 supp
= supp
.replace("Memcheck:Addr%d" % sz
, "Memcheck:Unaddressable")
297 supp
= supp
.replace("Memcheck:Value%d" % sz
, "Memcheck:Uninitialized")
298 supp
= supp
.replace("Memcheck:Cond", "Memcheck:Uninitialized")
300 # Split into lines so we can enforce length limits
301 supplines
= supp
.split("\n")
302 supp
= None # to avoid re-use
304 # Truncate at line 26 (VG_MAX_SUPP_CALLERS plus 2 for name and type)
305 # or at the first 'boring' caller.
306 # (https://bugs.kde.org/show_bug.cgi?id=199468 proposes raising
307 # VG_MAX_SUPP_CALLERS, but we're probably fine with it as is.)
308 newlen
= min(26, len(supplines
));
310 # Drop boring frames and all the following.
311 enough_frames
= False
312 for frameno
in range(newlen
):
313 for boring_caller
in _BORING_CALLERS
:
314 if re
.match("^ +fun:%s$" % boring_caller
, supplines
[frameno
]):
320 if (len(supplines
) > newlen
):
321 supplines
= supplines
[0:newlen
]
322 supplines
.append("}")
324 for frame
in range(len(supplines
)):
325 # Replace the always-changing anonymous namespace prefix with "*".
326 m
= re
.match("( +fun:)_ZN.*_GLOBAL__N_.*\.cc_" +
327 "[0-9a-fA-F]{8}_[0-9a-fA-F]{8}(.*)",
330 supplines
[frame
] = "*".join(m
.groups())
332 output
+= "\n".join(supplines
) + "\n"
333 output
+= "### END MEMORY TOOL REPORT (error hash=#%016X#)\n" % \
338 def UniqueString(self
):
339 ''' String to use for object identity. Don't print this, use str(obj)
341 rep
= self
._kind
+ " "
342 for backtrace
in self
._backtraces
:
343 for frame
in backtrace
[1]:
344 rep
+= frame
[FUNCTION_NAME
]
346 if frame
[SRC_FILE_DIR
] != "":
347 rep
+= frame
[SRC_FILE_DIR
] + "/" + frame
[SRC_FILE_NAME
]
349 rep
+= frame
[OBJECT_FILE
]
353 # This is a device-independent hash identifying the suppression.
354 # By printing out this hash we can find duplicate reports between tests and
355 # different shards running on multiple buildbots
357 return int(hashlib
.md5(self
.UniqueString()).hexdigest()[:16], 16)
360 return hash(self
.UniqueString())
361 def __eq__(self
, rhs
):
362 return self
.UniqueString() == rhs
364 def log_is_finished(f
, force_finish
):
372 # Okay, the log is not finished but we can make it up to be parseable:
373 if prev_line
.strip() in ["</error>", "</errorcounts>", "</status>"]:
374 f
.write("</valgrindoutput>\n")
377 if '</valgrindoutput>' in line
:
378 # Valgrind often has garbage after </valgrindoutput> upon crash.
383 class MemcheckAnalyzer
:
384 ''' Given a set of Valgrind XML files, parse all the errors out of them,
385 unique them and output the results.'''
387 SANITY_TEST_SUPPRESSIONS
= {
388 "Memcheck sanity test 01 (memory leak).": 1,
389 "Memcheck sanity test 02 (malloc/read left).": 1,
390 "Memcheck sanity test 03 (malloc/read right).": 1,
391 "Memcheck sanity test 04 (malloc/write left).": 1,
392 "Memcheck sanity test 05 (malloc/write right).": 1,
393 "Memcheck sanity test 06 (new/read left).": 1,
394 "Memcheck sanity test 07 (new/read right).": 1,
395 "Memcheck sanity test 08 (new/write left).": 1,
396 "Memcheck sanity test 09 (new/write right).": 1,
397 "Memcheck sanity test 10 (write after free).": 1,
398 "Memcheck sanity test 11 (write after delete).": 1,
399 "Memcheck sanity test 12 (array deleted without []).": 1,
400 "Memcheck sanity test 13 (single element deleted with []).": 1,
401 "Memcheck sanity test 14 (malloc/read uninit).": 1,
402 "Memcheck sanity test 15 (new/read uninit).": 1,
405 # Max time to wait for memcheck logs to complete.
406 LOG_COMPLETION_TIMEOUT
= 180.0
408 def __init__(self
, source_dir
, show_all_leaks
=False, use_gdb
=False):
409 '''Create a parser for Memcheck logs.
412 source_dir: Path to top of source tree for this build
413 show_all_leaks: Whether to show even less important leaks
414 use_gdb: Whether to use gdb to resolve source filenames and line numbers
415 in the report stacktraces
417 self
._source
_dir
= source_dir
418 self
._show
_all
_leaks
= show_all_leaks
419 self
._use
_gdb
= use_gdb
421 # Contains the set of unique errors
424 # Contains the time when the we started analyzing the first log file.
425 # This variable is used to skip incomplete logs after some timeout.
426 self
._analyze
_start
_time
= None
429 def Report(self
, files
, testcase
, check_sanity
=False):
430 '''Reads in a set of files and prints Memcheck report.
433 files: A list of filenames.
434 check_sanity: if true, search for SANITY_TEST_SUPPRESSIONS
436 # Beyond the detailed errors parsed by ValgrindError above,
437 # the xml file contain records describing suppressions that were used:
441 # <name>pango_font_leak_todo</name>
445 # <name>bug_13243</name>
448 # Collect these and print them at the end.
450 # With our patch for https://bugs.kde.org/show_bug.cgi?id=205000 in,
451 # the file also includes records of the form
452 # <load_obj><obj>/usr/lib/libgcc_s.1.dylib</obj><ip>0x27000</ip></load_obj>
453 # giving the filename and load address of each binary that was mapped
456 global TheAddressTable
458 TheAddressTable
= gdb_helper
.AddressTable()
460 TheAddressTable
= None
461 cur_report_errors
= set()
462 suppcounts
= defaultdict(int)
465 if self
._analyze
_start
_time
== None:
466 self
._analyze
_start
_time
= time
.time()
467 start_time
= self
._analyze
_start
_time
471 # Wait up to three minutes for valgrind to finish writing all files,
472 # but after that, just skip incomplete files and warn.
474 pid
= re
.match(".*\.([0-9]+)$", file)
476 pid
= pid
.groups()[0]
481 origsize
= os
.path
.getsize(file)
482 while (running
and not found
and not skip
and
484 ((time
.time() - start_time
) < self
.LOG_COMPLETION_TIMEOUT
))):
488 # Make sure the process is still running so we don't wait for
489 # 3 minutes if it was killed. See http://crbug.com/17453
490 ps_out
= subprocess
.Popen("ps p %s" % pid
, shell
=True,
491 stdout
=subprocess
.PIPE
).stdout
492 if len(ps_out
.readlines()) < 2:
497 found
= log_is_finished(f
, False)
498 if not running
and not found
:
499 logging
.warn("Valgrind process PID = %s is not running but its "
500 "XML log has not been finished correctly.\n"
501 "Make it up by adding some closing tags manually." % pid
)
502 found
= log_is_finished(f
, not running
)
503 if running
and not found
:
509 newsize
= os
.path
.getsize(file)
510 if origsize
> newsize
+1:
511 logging
.warn(str(origsize
- newsize
) +
512 " bytes of junk were after </valgrindoutput> in %s!" %
515 parsed_file
= parse(file);
516 except ExpatError
, e
:
518 logging
.warn("could not parse %s: %s" % (file, e
))
519 lineno
= e
.lineno
- 1
521 context_start
= max(0, lineno
- context_lines
)
522 context_end
= lineno
+ context_lines
+ 1
523 context_file
= open(file, "r")
524 for i
in range(0, context_start
):
525 context_file
.readline()
526 for i
in range(context_start
, context_end
):
527 context_data
= context_file
.readline().rstrip()
529 logging
.warn(" %s" % context_data
)
531 logging
.warn("> %s" % context_data
)
534 if TheAddressTable
!= None:
535 load_objs
= parsed_file
.getElementsByTagName("load_obj")
536 for load_obj
in load_objs
:
537 obj
= getTextOf(load_obj
, "obj")
538 ip
= getTextOf(load_obj
, "ip")
539 TheAddressTable
.AddBinaryAt(obj
, ip
)
542 preamble
= parsed_file
.getElementsByTagName("preamble")[0];
543 for node
in preamble
.getElementsByTagName("line"):
544 if node
.localName
== "line":
545 for x
in node
.childNodes
:
546 if x
.nodeType
== node
.TEXT_NODE
and "Command" in x
.data
:
550 raw_errors
= parsed_file
.getElementsByTagName("error")
551 for raw_error
in raw_errors
:
552 # Ignore "possible" leaks for now by default.
553 if (self
._show
_all
_leaks
or
554 getTextOf(raw_error
, "kind") != "Leak_PossiblyLost"):
555 error
= ValgrindError(self
._source
_dir
,
556 raw_error
, commandline
, testcase
)
557 if error
not in cur_report_errors
:
558 # We haven't seen such errors doing this report yet...
559 if error
in self
._errors
:
560 # ... but we saw it in earlier reports, e.g. previous UI test
561 cur_report_errors
.add("This error was already printed in "
562 "some other test, see 'hash=#%016X#'" % \
565 # ... and we haven't seen it in other tests as well
566 self
._errors
.add(error
)
567 cur_report_errors
.add(error
)
569 suppcountlist
= parsed_file
.getElementsByTagName("suppcounts")
570 if len(suppcountlist
) > 0:
571 suppcountlist
= suppcountlist
[0]
572 for node
in suppcountlist
.getElementsByTagName("pair"):
573 count
= getTextOf(node
, "count");
574 name
= getTextOf(node
, "name");
575 suppcounts
[name
] += int(count
)
577 if len(badfiles
) > 0:
578 logging
.warn("valgrind didn't finish writing %d files?!" % len(badfiles
))
579 for file in badfiles
:
580 logging
.warn("Last 20 lines of %s :" % file)
581 os
.system("tail -n 20 '%s' 1>&2" % file)
584 logging
.error("FAIL! Couldn't parse Valgrind output file")
587 common
.PrintUsedSuppressionsList(suppcounts
)
590 if cur_report_errors
:
591 logging
.error("FAIL! There were %s errors: " % len(cur_report_errors
))
593 if TheAddressTable
!= None:
594 TheAddressTable
.ResolveAll()
596 for error
in cur_report_errors
:
601 # Report tool's insanity even if there were errors.
603 remaining_sanity_supp
= MemcheckAnalyzer
.SANITY_TEST_SUPPRESSIONS
604 for (name
, count
) in suppcounts
.iteritems():
605 # Workaround for http://crbug.com/334074
606 if (name
in remaining_sanity_supp
and
607 remaining_sanity_supp
[name
] <= count
):
608 del remaining_sanity_supp
[name
]
609 if remaining_sanity_supp
:
610 logging
.error("FAIL! Sanity check failed!")
611 logging
.info("The following test errors were not handled: ")
612 for (name
, count
) in remaining_sanity_supp
.iteritems():
613 logging
.info(" * %dx %s" % (count
, name
))
619 logging
.info("PASS! No errors found!")
624 '''For testing only. The MemcheckAnalyzer class should be imported instead.'''
625 parser
= optparse
.OptionParser("usage: %prog [options] <files to analyze>")
626 parser
.add_option("", "--source-dir",
627 help="path to top of source tree for this build"
628 "(used to normalize source paths in baseline)")
630 (options
, args
) = parser
.parse_args()
632 parser
.error("no filename specified")
635 analyzer
= MemcheckAnalyzer(options
.source_dir
, use_gdb
=True)
636 return analyzer
.Report(filenames
, None)
639 if __name__
== "__main__":