2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
8 ''' Given a valgrind XML file, parses errors and uniques them.'''
12 from collections
import defaultdict
21 from xml
.dom
.minidom
import parse
22 from xml
.parsers
.expat
import ExpatError
26 # Global symbol table (yuck)
27 TheAddressTable
= None
29 # These are regexps that define functions (using C++ mangled names)
30 # we don't want to see in stack traces while pretty printing
31 # or generating suppressions.
32 # Just stop printing the stack/suppression frames when the current one
33 # matches any of these.
34 _BORING_CALLERS
= common
.BoringCallers(mangled
=True, use_re_wildcards
=True)
36 def getTextOf(top_node
, name
):
37 ''' Returns all text in all DOM nodes with a certain |name| that are children
42 for nodes_named
in top_node
.getElementsByTagName(name
):
43 text
+= "".join([node
.data
for node
in nodes_named
.childNodes
44 if node
.nodeType
== node
.TEXT_NODE
])
47 def getCDATAOf(top_node
, name
):
48 ''' Returns all CDATA in all DOM nodes with a certain |name| that are children
53 for nodes_named
in top_node
.getElementsByTagName(name
):
54 text
+= "".join([node
.data
for node
in nodes_named
.childNodes
55 if node
.nodeType
== node
.CDATA_SECTION_NODE
])
60 def shortenFilePath(source_dir
, directory
):
61 '''Returns a string with the string prefix |source_dir| removed from
63 prefixes_to_cut
= ["build/src/", "valgrind/coregrind/", "out/Release/../../"]
66 prefixes_to_cut
.append(source_dir
)
68 for p
in prefixes_to_cut
:
69 index
= directory
.rfind(p
)
71 directory
= directory
[index
+ len(p
):]
75 # Constants that give real names to the abbreviations in valgrind XML output.
76 INSTRUCTION_POINTER
= "ip"
80 SRC_FILE_NAME
= "file"
83 def gatherFrames(node
, source_dir
):
85 for frame
in node
.getElementsByTagName("frame"):
87 INSTRUCTION_POINTER
: getTextOf(frame
, INSTRUCTION_POINTER
),
88 OBJECT_FILE
: getTextOf(frame
, OBJECT_FILE
),
89 FUNCTION_NAME
: getTextOf(frame
, FUNCTION_NAME
),
90 SRC_FILE_DIR
: shortenFilePath(
91 source_dir
, getTextOf(frame
, SRC_FILE_DIR
)),
92 SRC_FILE_NAME
: getTextOf(frame
, SRC_FILE_NAME
),
93 SRC_LINE
: getTextOf(frame
, SRC_LINE
)
96 # Ignore this frame and all the following if it's a "boring" function.
98 for regexp
in _BORING_CALLERS
:
99 if re
.match("^%s$" % regexp
, frame_dict
[FUNCTION_NAME
]):
105 frames
+= [frame_dict
]
107 global TheAddressTable
108 if TheAddressTable
!= None and frame_dict
[SRC_LINE
] == "":
110 TheAddressTable
.Add(frame_dict
[OBJECT_FILE
],
111 frame_dict
[INSTRUCTION_POINTER
])
115 ''' Takes a <DOM Element: error> node and reads all the data from it. A
116 ValgrindError is immutable and is hashed on its pretty printed output.
119 def __init__(self
, source_dir
, error_node
, commandline
, testcase
):
120 ''' Copies all the relevant information out of the DOM and into object
124 error_node: The <error></error> DOM node we're extracting from.
125 source_dir: Prefix that should be stripped from the <dir> node.
126 commandline: The command that was run under valgrind
127 testcase: The test case name, if known.
130 # Valgrind errors contain one <what><stack> pair, plus an optional
131 # <auxwhat><stack> pair, plus an optional <origin><what><stack></origin>,
132 # plus (since 3.5.0) a <suppression></suppression> pair.
133 # (Origin is nicely enclosed; too bad the other two aren't.)
134 # The most common way to see all three in one report is
135 # a syscall with a parameter that points to uninitialized memory, e.g.
138 # <unique>0x6d</unique>
140 # <kind>SyscallParam</kind>
141 # <what>Syscall param write(buf) points to uninitialised byte(s)</what>
147 # <auxwhat>Address 0x5c9af4f is 7 bytes inside a block of ...</auxwhat>
154 # <what>Uninitialised value was created by a heap allocation</what>
162 # <sname>insert_a_suppression_name_here</sname>
163 # <skind>Memcheck:Param</skind>
164 # <skaux>write(buf)</skaux>
165 # <sframe> <fun>__write_nocancel</fun> </sframe>
167 # <sframe> <fun>main</fun> </sframe>
171 # <insert_a_suppression_name_here>
174 # fun:__write_nocancel
183 # Each frame looks like this:
186 # <obj>/data/dkegel/chrome-build/src/out/Release/base_unittests</obj>
187 # <fn>_ZN7testing8internal12TestInfoImpl7RunTestEPNS_8TestInfoE</fn>
188 # <dir>/data/dkegel/chrome-build/src/testing/gtest/src</dir>
189 # <file>gtest-internal-inl.h</file>
192 # although the dir, file, and line elements are missing if there is
195 self
._kind
= getTextOf(error_node
, "kind")
196 self
._backtraces
= []
197 self
._suppression
= None
198 self
._commandline
= commandline
199 self
._testcase
= testcase
200 self
._additional
= []
202 # Iterate through the nodes, parsing <what|auxwhat><stack> pairs.
204 for node
in error_node
.childNodes
:
205 if node
.localName
== "what" or node
.localName
== "auxwhat":
206 description
= "".join([n
.data
for n
in node
.childNodes
207 if n
.nodeType
== n
.TEXT_NODE
])
208 elif node
.localName
== "xwhat":
209 description
= getTextOf(node
, "text")
210 elif node
.localName
== "stack":
212 self
._backtraces
.append([description
, gatherFrames(node
, source_dir
)])
214 elif node
.localName
== "origin":
215 description
= getTextOf(node
, "what")
216 stack
= node
.getElementsByTagName("stack")[0]
217 frames
= gatherFrames(stack
, source_dir
)
218 self
._backtraces
.append([description
, frames
])
222 elif description
and node
.localName
!= None:
223 # The lastest description has no stack, e.g. "Address 0x28 is unknown"
224 self
._additional
.append(description
)
227 if node
.localName
== "suppression":
228 self
._suppression
= getCDATAOf(node
, "rawtext");
231 ''' Pretty print the type and backtrace(s) of this specific error,
232 including suppression (which is just a mangled backtrace).'''
234 if (self
._commandline
):
235 output
+= self
._commandline
+ "\n"
237 output
+= self
._kind
+ "\n"
238 for backtrace
in self
._backtraces
:
239 output
+= backtrace
[0] + "\n"
240 filter = subprocess
.Popen("c++filt -n", stdin
=subprocess
.PIPE
,
241 stdout
=subprocess
.PIPE
,
242 stderr
=subprocess
.STDOUT
,
246 for frame
in backtrace
[1]:
247 buf
+= (frame
[FUNCTION_NAME
] or frame
[INSTRUCTION_POINTER
]) + "\n"
248 (stdoutbuf
, stderrbuf
) = filter.communicate(buf
.encode('latin-1'))
249 demangled_names
= stdoutbuf
.split("\n")
252 for frame
in backtrace
[1]:
253 output
+= (" " + demangled_names
[i
])
256 global TheAddressTable
257 if TheAddressTable
!= None and frame
[SRC_FILE_DIR
] == "":
259 foo
= TheAddressTable
.GetFileLine(frame
[OBJECT_FILE
],
260 frame
[INSTRUCTION_POINTER
])
262 output
+= (" (" + foo
[0] + ":" + foo
[1] + ")")
263 elif frame
[SRC_FILE_DIR
] != "":
264 output
+= (" (" + frame
[SRC_FILE_DIR
] + "/" + frame
[SRC_FILE_NAME
] +
265 ":" + frame
[SRC_LINE
] + ")")
267 output
+= " (" + frame
[OBJECT_FILE
] + ")"
270 for additional
in self
._additional
:
271 output
+= additional
+ "\n"
273 assert self
._suppression
!= None, "Your Valgrind doesn't generate " \
274 "suppressions - is it too old?"
277 output
+= "The report came from the `%s` test.\n" % self
._testcase
278 output
+= "Suppression (error hash=#%016X#):\n" % self
.ErrorHash()
279 output
+= (" For more info on using suppressions see "
280 "http://dev.chromium.org/developers/tree-sheriffs/sheriff-details-chromium/memory-sheriff#TOC-Suppressing-memory-reports")
282 # Widen suppression slightly to make portable between mac and linux
283 # TODO(timurrrr): Oops, these transformations should happen
284 # BEFORE calculating the hash!
285 supp
= self
._suppression
;
286 supp
= supp
.replace("fun:_Znwj", "fun:_Znw*")
287 supp
= supp
.replace("fun:_Znwm", "fun:_Znw*")
288 supp
= supp
.replace("fun:_Znaj", "fun:_Zna*")
289 supp
= supp
.replace("fun:_Znam", "fun:_Zna*")
291 # Make suppressions even less platform-dependent.
292 for sz
in [1, 2, 4, 8]:
293 supp
= supp
.replace("Memcheck:Addr%d" % sz
, "Memcheck:Unaddressable")
294 supp
= supp
.replace("Memcheck:Value%d" % sz
, "Memcheck:Uninitialized")
295 supp
= supp
.replace("Memcheck:Cond", "Memcheck:Uninitialized")
297 # Split into lines so we can enforce length limits
298 supplines
= supp
.split("\n")
299 supp
= None # to avoid re-use
301 # Truncate at line 26 (VG_MAX_SUPP_CALLERS plus 2 for name and type)
302 # or at the first 'boring' caller.
303 # (https://bugs.kde.org/show_bug.cgi?id=199468 proposes raising
304 # VG_MAX_SUPP_CALLERS, but we're probably fine with it as is.)
305 newlen
= min(26, len(supplines
));
307 # Drop boring frames and all the following.
308 enough_frames
= False
309 for frameno
in range(newlen
):
310 for boring_caller
in _BORING_CALLERS
:
311 if re
.match("^ +fun:%s$" % boring_caller
, supplines
[frameno
]):
317 if (len(supplines
) > newlen
):
318 supplines
= supplines
[0:newlen
]
319 supplines
.append("}")
321 for frame
in range(len(supplines
)):
322 # Replace the always-changing anonymous namespace prefix with "*".
323 m
= re
.match("( +fun:)_ZN.*_GLOBAL__N_.*\.cc_" +
324 "[0-9a-fA-F]{8}_[0-9a-fA-F]{8}(.*)",
327 supplines
[frame
] = "*".join(m
.groups())
329 output
+= "\n".join(supplines
) + "\n"
333 def UniqueString(self
):
334 ''' String to use for object identity. Don't print this, use str(obj)
336 rep
= self
._kind
+ " "
337 for backtrace
in self
._backtraces
:
338 for frame
in backtrace
[1]:
339 rep
+= frame
[FUNCTION_NAME
]
341 if frame
[SRC_FILE_DIR
] != "":
342 rep
+= frame
[SRC_FILE_DIR
] + "/" + frame
[SRC_FILE_NAME
]
344 rep
+= frame
[OBJECT_FILE
]
348 # This is a device-independent hash identifying the suppression.
349 # By printing out this hash we can find duplicate reports between tests and
350 # different shards running on multiple buildbots
352 return int(hashlib
.md5(self
.UniqueString()).hexdigest()[:16], 16)
355 return hash(self
.UniqueString())
356 def __eq__(self
, rhs
):
357 return self
.UniqueString() == rhs
359 def log_is_finished(f
, force_finish
):
367 # Okay, the log is not finished but we can make it up to be parseable:
368 if prev_line
.strip() in ["</error>", "</errorcounts>", "</status>"]:
369 f
.write("</valgrindoutput>\n")
372 if '</valgrindoutput>' in line
:
373 # Valgrind often has garbage after </valgrindoutput> upon crash.
378 class MemcheckAnalyzer
:
379 ''' Given a set of Valgrind XML files, parse all the errors out of them,
380 unique them and output the results.'''
382 SANITY_TEST_SUPPRESSIONS
= {
383 "Memcheck sanity test 01 (memory leak).": 1,
384 "Memcheck sanity test 02 (malloc/read left).": 1,
385 "Memcheck sanity test 03 (malloc/read right).": 1,
386 "Memcheck sanity test 04 (malloc/write left).": 1,
387 "Memcheck sanity test 05 (malloc/write right).": 1,
388 "Memcheck sanity test 06 (new/read left).": 1,
389 "Memcheck sanity test 07 (new/read right).": 1,
390 "Memcheck sanity test 08 (new/write left).": 1,
391 "Memcheck sanity test 09 (new/write right).": 1,
392 "Memcheck sanity test 10 (write after free).": 1,
393 "Memcheck sanity test 11 (write after delete).": 1,
394 "Memcheck sanity test 12 (array deleted without []).": 1,
395 "Memcheck sanity test 13 (single element deleted with []).": 1,
396 "Memcheck sanity test 14 (malloc/read uninit).": 1,
397 "Memcheck sanity test 15 (new/read uninit).": 1,
400 # Max time to wait for memcheck logs to complete.
401 LOG_COMPLETION_TIMEOUT
= 180.0
403 def __init__(self
, source_dir
, show_all_leaks
=False, use_gdb
=False):
404 '''Create a parser for Memcheck logs.
407 source_dir: Path to top of source tree for this build
408 show_all_leaks: Whether to show even less important leaks
409 use_gdb: Whether to use gdb to resolve source filenames and line numbers
410 in the report stacktraces
412 self
._source
_dir
= source_dir
413 self
._show
_all
_leaks
= show_all_leaks
414 self
._use
_gdb
= use_gdb
416 # Contains the set of unique errors
419 # Contains the time when the we started analyzing the first log file.
420 # This variable is used to skip incomplete logs after some timeout.
421 self
._analyze
_start
_time
= None
424 def Report(self
, files
, testcase
, check_sanity
=False):
425 '''Reads in a set of files and prints Memcheck report.
428 files: A list of filenames.
429 check_sanity: if true, search for SANITY_TEST_SUPPRESSIONS
431 # Beyond the detailed errors parsed by ValgrindError above,
432 # the xml file contain records describing suppressions that were used:
436 # <name>pango_font_leak_todo</name>
440 # <name>bug_13243</name>
443 # Collect these and print them at the end.
445 # With our patch for https://bugs.kde.org/show_bug.cgi?id=205000 in,
446 # the file also includes records of the form
447 # <load_obj><obj>/usr/lib/libgcc_s.1.dylib</obj><ip>0x27000</ip></load_obj>
448 # giving the filename and load address of each binary that was mapped
451 global TheAddressTable
453 TheAddressTable
= gdb_helper
.AddressTable()
455 TheAddressTable
= None
456 cur_report_errors
= set()
457 suppcounts
= defaultdict(int)
460 if self
._analyze
_start
_time
== None:
461 self
._analyze
_start
_time
= time
.time()
462 start_time
= self
._analyze
_start
_time
466 # Wait up to three minutes for valgrind to finish writing all files,
467 # but after that, just skip incomplete files and warn.
469 pid
= re
.match(".*\.([0-9]+)$", file)
471 pid
= pid
.groups()[0]
476 origsize
= os
.path
.getsize(file)
477 while (running
and not found
and not skip
and
479 ((time
.time() - start_time
) < self
.LOG_COMPLETION_TIMEOUT
))):
483 # Make sure the process is still running so we don't wait for
484 # 3 minutes if it was killed. See http://crbug.com/17453
485 ps_out
= subprocess
.Popen("ps p %s" % pid
, shell
=True,
486 stdout
=subprocess
.PIPE
).stdout
487 if len(ps_out
.readlines()) < 2:
492 found
= log_is_finished(f
, False)
493 if not running
and not found
:
494 logging
.warn("Valgrind process PID = %s is not running but its "
495 "XML log has not been finished correctly.\n"
496 "Make it up by adding some closing tags manually." % pid
)
497 found
= log_is_finished(f
, not running
)
498 if running
and not found
:
504 newsize
= os
.path
.getsize(file)
505 if origsize
> newsize
+1:
506 logging
.warn(str(origsize
- newsize
) +
507 " bytes of junk were after </valgrindoutput> in %s!" %
510 parsed_file
= parse(file);
511 except ExpatError
, e
:
513 logging
.warn("could not parse %s: %s" % (file, e
))
514 lineno
= e
.lineno
- 1
516 context_start
= max(0, lineno
- context_lines
)
517 context_end
= lineno
+ context_lines
+ 1
518 context_file
= open(file, "r")
519 for i
in range(0, context_start
):
520 context_file
.readline()
521 for i
in range(context_start
, context_end
):
522 context_data
= context_file
.readline().rstrip()
524 logging
.warn(" %s" % context_data
)
526 logging
.warn("> %s" % context_data
)
529 if TheAddressTable
!= None:
530 load_objs
= parsed_file
.getElementsByTagName("load_obj")
531 for load_obj
in load_objs
:
532 obj
= getTextOf(load_obj
, "obj")
533 ip
= getTextOf(load_obj
, "ip")
534 TheAddressTable
.AddBinaryAt(obj
, ip
)
537 preamble
= parsed_file
.getElementsByTagName("preamble")[0];
538 for node
in preamble
.getElementsByTagName("line"):
539 if node
.localName
== "line":
540 for x
in node
.childNodes
:
541 if x
.nodeType
== node
.TEXT_NODE
and "Command" in x
.data
:
545 raw_errors
= parsed_file
.getElementsByTagName("error")
546 for raw_error
in raw_errors
:
547 # Ignore "possible" leaks for now by default.
548 if (self
._show
_all
_leaks
or
549 getTextOf(raw_error
, "kind") != "Leak_PossiblyLost"):
550 error
= ValgrindError(self
._source
_dir
,
551 raw_error
, commandline
, testcase
)
552 if error
not in cur_report_errors
:
553 # We haven't seen such errors doing this report yet...
554 if error
in self
._errors
:
555 # ... but we saw it in earlier reports, e.g. previous UI test
556 cur_report_errors
.add("This error was already printed in "
557 "some other test, see 'hash=#%016X#'" % \
560 # ... and we haven't seen it in other tests as well
561 self
._errors
.add(error
)
562 cur_report_errors
.add(error
)
564 suppcountlist
= parsed_file
.getElementsByTagName("suppcounts")
565 if len(suppcountlist
) > 0:
566 suppcountlist
= suppcountlist
[0]
567 for node
in suppcountlist
.getElementsByTagName("pair"):
568 count
= getTextOf(node
, "count");
569 name
= getTextOf(node
, "name");
570 suppcounts
[name
] += int(count
)
572 if len(badfiles
) > 0:
573 logging
.warn("valgrind didn't finish writing %d files?!" % len(badfiles
))
574 for file in badfiles
:
575 logging
.warn("Last 20 lines of %s :" % file)
576 os
.system("tail -n 20 '%s' 1>&2" % file)
579 logging
.error("FAIL! Couldn't parse Valgrind output file")
582 common
.PrintUsedSuppressionsList(suppcounts
)
585 if cur_report_errors
:
586 logging
.error("FAIL! There were %s errors: " % len(cur_report_errors
))
588 if TheAddressTable
!= None:
589 TheAddressTable
.ResolveAll()
591 for error
in cur_report_errors
:
596 # Report tool's insanity even if there were errors.
598 remaining_sanity_supp
= MemcheckAnalyzer
.SANITY_TEST_SUPPRESSIONS
599 for (name
, count
) in suppcounts
.iteritems():
600 # Workaround for http://crbug.com/334074
601 if (name
in remaining_sanity_supp
and
602 remaining_sanity_supp
[name
] <= count
):
603 del remaining_sanity_supp
[name
]
604 if remaining_sanity_supp
:
605 logging
.error("FAIL! Sanity check failed!")
606 logging
.info("The following test errors were not handled: ")
607 for (name
, count
) in remaining_sanity_supp
.iteritems():
608 logging
.info(" * %dx %s" % (count
, name
))
614 logging
.info("PASS! No errors found!")
619 '''For testing only. The MemcheckAnalyzer class should be imported instead.'''
620 parser
= optparse
.OptionParser("usage: %prog [options] <files to analyze>")
621 parser
.add_option("", "--source-dir",
622 help="path to top of source tree for this build"
623 "(used to normalize source paths in baseline)")
625 (options
, args
) = parser
.parse_args()
627 parser
.error("no filename specified")
630 analyzer
= MemcheckAnalyzer(options
.source_dir
, use_gdb
=True)
631 return analyzer
.Report(filenames
, None)
634 if __name__
== "__main__":