Roll src/third_party/WebKit 3aea697:d9c6159 (svn 201973:201974)
[chromium-blink-merge.git] / tools / findit / findit_for_crash.py
blob689294987ca0b1ccb2f4efbb3889c0e5c6fc78b8
1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 import os
6 from threading import Lock
8 import blame
9 from common import utils
10 import component_dictionary
11 import crash_utils
12 import git_repository_parser
13 import match_set
14 import svn_repository_parser
17 LINE_CHANGE_PRIORITY = 1
18 FILE_CHANGE_PRIORITY = 2
19 _THIS_DIR = os.path.abspath(os.path.dirname(__file__))
20 CONFIG = crash_utils.ParseURLsFromConfig(os.path.join(_THIS_DIR,
21 'config.ini'))
24 def GenerateMatchEntry(
25 matches, revision_info, revision_number, file_path, function,
26 component_path, component_name, crashed_line_numbers, stack_frame_indices,
27 file_change_type, repository_parser):
28 """Generates a match object and adds it to the match set.
30 Args:
31 matches: A matchset object, a map from CL to a match object.
32 revision_info: The revision information, a map from fields (message,
33 changed files, etc) to its values.
34 revision_number: The SVN revision number or git hash.
35 file_path: The path of the file.
36 function: The function that caused an crash.
37 component_path: The path of the component this file is from.
38 component_name: The name of the component the file is from.
39 crashed_line_numbers: The list of the lines in the file that caused
40 the crash.
41 stack_frame_indices: The list of positions of this file within a stack.
42 file_change_type: Whether file is modified, added or deleted.
43 repository_parser: The parser object to parse line diff.
44 """
45 # Check if this CL should be ignored.
46 with matches.matches_lock:
47 if revision_number in matches.cls_to_ignore:
48 return
50 # If this CL is not already identified as suspected, create a new entry.
51 if revision_number not in matches.matches:
52 match = match_set.Match(revision_info, component_name)
53 message = revision_info['message']
54 # TODO(jeun): Don't hold lock while issuing http request.
55 match.ParseMessage(message, matches.codereview_api_url)
57 # If this match is a revert, add to the set of CLs to be ignored.
58 if match.is_revert:
59 matches.cls_to_ignore.add(revision_number)
61 # If this match has info on what CL it is reverted from, add that CL.
62 if match.revert_of:
63 matches.cls_to_ignore.add(match.revert_of)
65 return
67 matches.matches[revision_number] = match
69 else:
70 match = matches.matches[revision_number]
72 (diff_url, changed_line_numbers, changed_line_contents) = (
73 repository_parser.ParseLineDiff(
74 file_path, component_path, file_change_type, revision_number))
76 # Ignore this match if the component is not supported for svn.
77 if not diff_url:
78 return
80 # Find the intersection between the lines that this file crashed on and
81 # the changed lines.
82 (line_number_intersection, stack_frame_index_intersection, functions) = (
83 crash_utils.Intersection(
84 crashed_line_numbers, stack_frame_indices, changed_line_numbers,
85 function))
87 # Find the minimum distance between the changed lines and crashed lines.
88 (min_distance, min_crashed_line, min_changed_line) = \
89 crash_utils.FindMinLineDistance(crashed_line_numbers,
90 changed_line_numbers)
92 # Check whether this CL changes the crashed lines or not.
93 if line_number_intersection:
94 priority = LINE_CHANGE_PRIORITY
95 else:
96 priority = FILE_CHANGE_PRIORITY
98 # Add the parsed information to the object.
99 with matches.matches_lock:
100 match.crashed_line_numbers.append(line_number_intersection)
102 file_name = file_path.split('/')[-1]
103 match.changed_files.append(file_name)
105 # Update the min distance only if it is less than the current one.
106 if min_distance < match.min_distance:
107 match.min_distance = min_distance
108 match.min_distance_info = (file_name, min_crashed_line, min_changed_line)
110 # If this CL does not change the crashed line, all occurrence of this
111 # file in the stack has the same priority.
112 if not stack_frame_index_intersection:
113 stack_frame_index_intersection = stack_frame_indices
114 functions = function
115 match.stack_frame_indices.append(stack_frame_index_intersection)
116 match.changed_file_urls.append(diff_url)
117 match.priorities.append(priority)
118 match.function_list.append(functions)
121 def FindMatch(revisions_info_map, file_to_revision_info, file_to_crash_info,
122 component_path, component_name, repository_parser,
123 codereview_api_url):
124 """Finds a CL that modifies file in the stacktrace.
126 Args:
127 revisions_info_map: A dictionary mapping revision number to the CL
128 information.
129 file_to_revision_info: A dictionary mapping file to the revision that
130 modifies it.
131 file_to_crash_info: A dictionary mapping file to its occurrence in
132 stacktrace.
133 component_path: The path of the component to search for.
134 component_name: The name of the component to search for.
135 repository_parser: The parser object to parse the line diff.
136 codereview_api_url: A code review url to retrieve data from.
138 Returns:
139 Matches, a set of match objects.
141 matches = match_set.MatchSet(codereview_api_url)
143 tasks = []
144 # Iterate through the crashed files in the stacktrace.
145 for crashed_file_path in file_to_crash_info:
146 # Ignore header file.
147 if crashed_file_path.endswith('.h'):
148 continue
150 # If the file in the stacktrace is not changed in any commits, continue.
151 for changed_file_path in file_to_revision_info:
152 changed_file_name = changed_file_path.split('/')[-1].lower()
153 crashed_file_name = crashed_file_path.split('/')[-1].lower()
154 if changed_file_name != crashed_file_name:
155 continue
157 if not crash_utils.GuessIfSameSubPath(
158 changed_file_path.lower(), crashed_file_path.lower()):
159 continue
161 crashed_line_numbers = file_to_crash_info.GetCrashedLineNumbers(
162 crashed_file_path)
163 stack_frame_nums = file_to_crash_info.GetCrashStackFrameIndices(
164 crashed_file_path)
165 functions = file_to_crash_info.GetCrashFunctions(crashed_file_path)
167 # Iterate through the CLs that this file path is changed.
168 for (cl, file_change_type) in file_to_revision_info[changed_file_path]:
169 # If the file change is delete, ignore this CL.
170 if file_change_type == 'D':
171 continue
173 revision = revisions_info_map[cl]
175 tasks.append({
176 'function': GenerateMatchEntry,
177 'args':[matches, revision, cl, changed_file_path, functions,
178 component_path, component_name, crashed_line_numbers,
179 stack_frame_nums, file_change_type,
180 repository_parser]})
182 # Run all the tasks.
183 crash_utils.RunTasks(tasks)
185 matches.RemoveRevertedCLs()
187 return matches
190 def FindMatchForComponent(component_path, file_to_crash_info, changelog,
191 callstack_priority, results, results_lock):
192 """Parses changelog and finds suspected CLs for a given component.
194 Args:
195 component_path: The path of component to look for the culprit CL.
196 file_to_crash_info: A dictionary mapping file to its occurrence in
197 stackframe.
198 changelog: The parsed changelog for this component.
199 callstack_priority: The priority of this call stack, 0 if from crash stack,
200 1 if from freed, 2 if from previously allocated.
201 results: A dictionary to store the result.
202 results_lock: A lock that guards results.
204 (repository_parser, component_name, revisions, file_to_revision_map) = \
205 changelog
207 # Find match for this component.
208 codereview_api_url = CONFIG['codereview']['review_url']
209 component_result = FindMatch(
210 revisions, file_to_revision_map, file_to_crash_info, component_path,
211 component_name, repository_parser, codereview_api_url)
212 matches = component_result.matches
214 # For all the match results in a dictionary, add to the list so that it
215 # can be sorted.
216 with results_lock:
217 for cl in matches:
218 match = matches[cl]
219 results.append((callstack_priority, cl, match))
222 def FindMatchForCallstack(
223 callstack, components, component_to_changelog_map, results,
224 results_lock):
225 """Finds culprit cl for a stack within a stacktrace.
227 For each components to look for, create new thread that computes the matches
228 and join the results at the end.
230 Args:
231 callstack: A callstack in a stacktrace to find the result for.
232 components: A set of components to look for.
233 component_to_changelog_map: A map from component to its parsed changelog.
234 results: A list to aggregrate results from all stacktraces.
235 results_lock: A lock that guards results.
237 # Create component dictionary from the component and call stack.
238 component_dict = component_dictionary.ComponentDictionary(callstack,
239 components)
240 callstack_priority = callstack.priority
242 # Iterate through all components.
243 for component_path in component_dict:
244 # If the component to consider in this callstack is not in the parsed list
245 # of components, ignore this one.
246 if component_path not in component_to_changelog_map:
247 continue
249 changelog = component_to_changelog_map[component_path]
250 file_to_crash_info = component_dict.GetFileDict(component_path)
251 FindMatchForComponent(component_path, file_to_crash_info, changelog,
252 callstack_priority, results, results_lock)
255 def FindMatchForStacktrace(stacktrace, components,
256 component_to_regression_dict):
257 """Finds the culprit CL for stacktrace.
259 The passed stacktrace is either from release build stacktrace
260 or debug build stacktrace.
262 Args:
263 stacktrace: A list of parsed stacks within a stacktrace.
264 components: A set of components to look for.
265 component_to_regression_dict: A dictionary mapping component path to
266 its regression.
268 Returns:
269 A list of match results from all stacks.
271 # A list to aggregate results from all the callstacks in the stacktrace.
272 results = []
273 results_lock = Lock()
275 # Setup parsers.
276 svn_parser = svn_repository_parser.SVNParser(CONFIG['svn'])
277 git_parser = git_repository_parser.GitParser(component_to_regression_dict,
278 CONFIG['git'])
280 # Create a cache of parsed revisions.
281 component_to_changelog_map = {}
282 for component_path in components:
283 component_object = component_to_regression_dict[component_path]
284 range_start = component_object['old_revision']
285 range_end = component_object['new_revision']
287 # If range start is 0, the range is too large and the crash has been
288 # introduced the archived build, so ignore this case.
289 if range_start == '0':
290 continue
292 component_name = component_to_regression_dict[component_path]['name']
294 is_git = utils.IsGitHash(range_start)
295 if is_git:
296 repository_parser = git_parser
297 else:
298 repository_parser = svn_parser
300 (revisions, file_to_revision_map) = repository_parser.ParseChangelog(
301 component_path, range_start, range_end)
303 # If the returned map from ParseChangeLog is empty, we don't need to look
304 # further because either the parsing failed or the changelog is empty.
305 if not (revisions and file_to_revision_map):
306 continue
308 component_to_changelog_map[component_path] = (repository_parser,
309 component_name,
310 revisions,
311 file_to_revision_map)
313 # Analyze each of the call stacks in the stacktrace.
314 for callstack in stacktrace.stack_list:
315 FindMatchForCallstack(callstack, components, component_to_changelog_map,
316 results, results_lock)
318 return results
321 def SortMatchesFunction(match_with_stack_priority):
322 """A function to sort the match triple.
324 Currently, it sorts the list by:
325 1) The highest priority file change in the CL (changing crashed line is
326 higher priority than just changing the file).
327 2) The callstack this match is computed (crash stack, freed, allocation).
328 3) The minimum stack frame number of the changed file in the match.
329 4) The number of files this CL changes (higher the better).
330 5) The minimum distance between the lines that the CL changes and crashed
331 lines.
333 Args:
334 match_with_stack_priority: A match object, with the CL it is from and what
335 callstack it is from.
337 Returns:
338 A sort key.
340 (stack_priority, _, match) = match_with_stack_priority
342 return (min(match.priorities),
343 stack_priority,
344 match.min_distance,
345 crash_utils.FindMinStackFrameNumber(match.stack_frame_indices,
346 match.priorities),
347 -len(match.changed_files))
350 def SortAndFilterMatches(matches, num_important_frames=5):
351 """Filters the list of potential culprit CLs to remove noise.
353 Args:
354 matches: A list containing match results.
355 num_important_frames: A number of frames on the top of the frame to Check
356 for when filtering the results. A match with a file
357 that is in top num_important_frames of the stacktrace
358 is regarded more probable then others.
360 Returns:
361 Filtered match results.
363 new_matches = []
364 line_changed = False
365 is_important_frame = False
366 highest_priority_stack = crash_utils.INFINITY
367 matches.sort(key=SortMatchesFunction)
368 # Iterate through the matches to find out what results are significant.
369 for stack_priority, cl, match in matches:
370 # Check if the current match changes crashed line.
371 is_line_change = (min(match.priorities) == LINE_CHANGE_PRIORITY)
373 # Check which stack this match is from, and finds the highest priority
374 # callstack up to this point.
375 current_stack = stack_priority
376 if current_stack < highest_priority_stack:
377 highest_priority_stack = current_stack
379 # Check if current match changes a file that occurs in crash state.
380 flattened_stack_frame_indices = [frame for frame_indices in
381 match.stack_frame_indices
382 for frame in frame_indices]
383 current_is_important = (
384 min(flattened_stack_frame_indices) < num_important_frames)
386 # This match and anything lower than this should be ignored if:
387 # - Current match does not change crashed lines but there are matches
388 # that do so.
389 # - Current match is not in crash state but there are matches in it.
390 # - There are other matches that came from higher priority stack.
391 if (line_changed and not is_line_change) or (
392 is_important_frame and not current_is_important) or (
393 current_stack > highest_priority_stack):
394 break
396 # Update the variables.
397 if is_line_change:
398 line_changed = True
399 if current_is_important:
400 is_important_frame = True
402 # Add current match to the filtered result.
403 new_matches.append((stack_priority, cl, match))
405 return new_matches
408 def GenerateReasonForMatches(matches):
409 """Generates a reason that a match (CL) is a culprit cl.
411 Args:
412 matches: A list of match objects.
414 # Iterate through the matches in the list.
415 for i, _, match in matches:
416 reason = []
418 # Zip the files in the match by the reason they are suspected
419 # (how the file is modified).
420 match_by_priority = zip(
421 match.priorities, match.crashed_line_numbers, match.changed_files,
422 match.stack_frame_indices, match.function_list)
424 # Sort the zipped changed files in the match by their priority so that the
425 # changed lines comes first in the reason.
426 match_by_priority.sort(
427 key=lambda (priority, crashed_line_numbers, file_name,
428 stack_frame_indices, function_list): priority)
430 # Iterate through the sorted match.
431 for i in range(len(match_by_priority)):
432 (priority, crashed_line_numbers, file_name, stack_frame_indices,
433 function_list) = match_by_priority[i]
435 # If the file in the match is a line change, append a explanation.
436 if priority == LINE_CHANGE_PRIORITY:
437 crashed_line_numbers = [crashed_line_number
438 for lines in crashed_line_numbers
439 for crashed_line_number in lines]
440 reason.append(
441 'Lines %s of file %s which potentially caused crash '
442 'are changed in this cl (%s).\n' %
443 (utils.JoinLineNumbers(crashed_line_numbers, accepted_gap=4),
444 file_name,
445 crash_utils.PrettifyFrameInfo(stack_frame_indices, function_list)))
447 else:
448 # Get all the files that are not line change.
449 rest_of_the_files = match_by_priority[i:]
451 if len(rest_of_the_files) == 1:
452 file_string = 'File %s is changed in this cl '
453 else:
454 file_string = 'Files %s are changed in this cl '
456 # Create a list of file names, and prettify the list.
457 file_names = [
458 file_name for (_, _, file_name, _, _) in rest_of_the_files]
459 pretty_file_names = crash_utils.PrettifyList(file_names)
461 # Add the reason, break because we took care of the rest of the files.
462 file_string += ('(and is part of stack %s)' %
463 crash_utils.PrettifyFrameInfo(stack_frame_indices, function_list))
464 reason.append(file_string % pretty_file_names)
465 break
467 # Set the reason as string.
468 match.reason = '\n'.join(reason)
471 def CombineMatches(matches):
472 """Combine possible duplicates in matches.
474 Args:
475 matches: A list of matches object, along with its callstack priority and
476 CL it is from.
477 Returns:
478 A combined list of matches.
480 combined_matches = []
482 for stack_index, cl, match in matches:
483 found_match = None
485 # Iterate through the list of combined matches.
486 for _, cl_combined, match_combined in combined_matches:
487 # Check for if current CL is already higher up in the result.
488 if cl == cl_combined:
489 found_match = match_combined
490 break
492 # If current match is not already in, add it to the list of matches.
493 if not found_match:
494 combined_matches.append((stack_index, cl, match))
495 continue
497 # Combine the reason if the current match is already in there.
498 found_match.reason += '\n' + match.reason
499 if match.min_distance < found_match.min_distance:
500 found_match.min_distance = match.min_distance
501 found_match.min_distance_info = match.min_distance_info
503 for stack_index, cl, match in combined_matches:
504 if match.min_distance_info:
505 file_name, min_crashed_line, min_changed_line = match.min_distance_info
506 match.reason = match.reason.strip()
507 match.reason += (
508 '\nMinimum distance from crash line to modified line: %d. '
509 '(file: %s, crashed on: %d, modified: %d).' %
510 (match.min_distance, file_name, min_crashed_line, min_changed_line))
512 return combined_matches
515 def FilterAndGenerateReasonForMatches(result):
516 """A wrapper function.
518 It generates reasons for the matches and returns string representation
519 of filtered results.
521 Args:
522 result: A list of match objects.
524 Returns:
525 A string representation of filtered results.
527 new_result = SortAndFilterMatches(result)
528 GenerateReasonForMatches(new_result)
529 combined_matches = CombineMatches(new_result)
530 return crash_utils.MatchListToResultList(combined_matches)
533 def ParseCrashComponents(main_stack):
534 """Parses the crashing component.
536 Crashing components is a component that top_n_frames of the stacktrace is
537 from.
539 Args:
540 main_stack: Main stack from the stacktrace.
542 Returns:
543 A set of components.
545 components = set()
547 for frame in main_stack.frame_list:
548 components.add(frame.component_path)
550 return components
553 def GenerateAndFilterBlameList(callstack, component_to_crash_revision_dict,
554 component_to_regression_dict):
555 """A wrapper function.
557 Finds blame information for stack and returns string representation.
559 Args:
560 callstack: A callstack to find the blame information.
561 component_to_crash_revision_dict: A dictionary mapping component to its
562 crash revision.
563 component_to_regression_dict: A dictionary mapping component to its
564 regression.
566 Returns:
567 A list of blame results.
569 if component_to_regression_dict:
570 parsed_deps = component_to_regression_dict
571 else:
572 parsed_deps = component_to_crash_revision_dict
574 # Setup parser objects to use for parsing blame information.
575 svn_parser = svn_repository_parser.SVNParser(CONFIG['svn'])
576 git_parser = git_repository_parser.GitParser(parsed_deps, CONFIG['git'])
577 parsers = {}
578 parsers['svn'] = svn_parser
579 parsers['git'] = git_parser
581 # Create and generate the blame objects from the callstack.
582 blame_list = blame.BlameList()
583 blame_list.FindBlame(callstack, component_to_crash_revision_dict,
584 component_to_regression_dict,
585 parsers)
587 blame_list.FilterAndSortBlameList()
588 return crash_utils.BlameListToResultList(blame_list)
591 def FindItForCrash(stacktrace_list,
592 callstack,
593 component_to_regression_dict,
594 component_to_crash_revision_dict):
595 """Finds the culprit CL from the list of stacktrace.
597 Args:
598 stacktrace_list: A list of stacktraces to look for, in the order of
599 decreasing significance.
600 callstack: A callstack object to show blame information for, if there are
601 no results for all stacktraces in the stacktrace_list.
602 component_to_regression_dict: A parsed regression information as a
603 result of parsing DEPS file.
604 component_to_crash_revision_dict: A parsed crash revision information.
606 Returns:
607 A list of result objects, with the message how the result is created.
609 # If regression information is not available, return blame information.
610 if not component_to_regression_dict:
611 result = GenerateAndFilterBlameList(callstack,
612 component_to_crash_revision_dict,
613 component_to_regression_dict)
614 if result:
615 return_message = (
616 'Regression information is not available. The result is '
617 'the blame information.')
618 else:
619 return_message = ('Findit could not find any suspected CLs.')
621 return (return_message, result)
623 for stacktrace in stacktrace_list:
624 # Check the next stacktrace if current one is empty.
625 if not stacktrace.stack_list:
626 continue
628 # Get the crash stack for this stacktrace, and extract crashing components
629 # from it.
630 main_stack = stacktrace.GetCrashStack()
631 components = ParseCrashComponents(main_stack)
633 result_for_stacktrace = FindMatchForStacktrace(
634 stacktrace, components, component_to_regression_dict)
635 filtered_result = FilterAndGenerateReasonForMatches(result_for_stacktrace)
637 # If the result is empty, check the next stacktrace. Else, return the
638 # filtered result.
639 if not filtered_result:
640 continue
642 return_message = (
643 'The result is a list of CLs that change the crashed files.')
644 return (return_message, filtered_result)
646 # If no match is found, return the blame information for the input
647 # callstack.
648 result = GenerateAndFilterBlameList(
649 callstack, component_to_crash_revision_dict,
650 component_to_regression_dict)
652 if result:
653 return_message = (
654 'No CL in the regression range changes the crashed files. '
655 'The result is the blame information.')
657 # When findit could not find any CL that changes file in stacktrace or if
658 # if cannot get any blame information, return a message saying that no
659 # results are available.
660 else:
661 return_message = ('Findit could not find any suspected CLs.')
663 return (return_message, result)