1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
6 from threading
import Lock
9 from common
import utils
10 import component_dictionary
12 import git_repository_parser
14 import svn_repository_parser
17 LINE_CHANGE_PRIORITY
= 1
18 FILE_CHANGE_PRIORITY
= 2
19 _THIS_DIR
= os
.path
.abspath(os
.path
.dirname(__file__
))
20 CONFIG
= crash_utils
.ParseURLsFromConfig(os
.path
.join(_THIS_DIR
,
24 def GenerateMatchEntry(
25 matches
, revision_info
, revision_number
, file_path
, function
,
26 component_path
, component_name
, crashed_line_numbers
, stack_frame_indices
,
27 file_change_type
, repository_parser
):
28 """Generates a match object and adds it to the match set.
31 matches: A matchset object, a map from CL to a match object.
32 revision_info: The revision information, a map from fields (message,
33 changed files, etc) to its values.
34 revision_number: The SVN revision number or git hash.
35 file_path: The path of the file.
36 function: The function that caused an crash.
37 component_path: The path of the component this file is from.
38 component_name: The name of the component the file is from.
39 crashed_line_numbers: The list of the lines in the file that caused
41 stack_frame_indices: The list of positions of this file within a stack.
42 file_change_type: Whether file is modified, added or deleted.
43 repository_parser: The parser object to parse line diff.
45 # Check if this CL should be ignored.
46 with matches
.matches_lock
:
47 if revision_number
in matches
.cls_to_ignore
:
50 # If this CL is not already identified as suspected, create a new entry.
51 if revision_number
not in matches
.matches
:
52 match
= match_set
.Match(revision_info
, component_name
)
53 message
= revision_info
['message']
54 # TODO(jeun): Don't hold lock while issuing http request.
55 match
.ParseMessage(message
, matches
.codereview_api_url
)
57 # If this match is a revert, add to the set of CLs to be ignored.
59 matches
.cls_to_ignore
.add(revision_number
)
61 # If this match has info on what CL it is reverted from, add that CL.
63 matches
.cls_to_ignore
.add(match
.revert_of
)
67 matches
.matches
[revision_number
] = match
70 match
= matches
.matches
[revision_number
]
72 (diff_url
, changed_line_numbers
, changed_line_contents
) = (
73 repository_parser
.ParseLineDiff(
74 file_path
, component_path
, file_change_type
, revision_number
))
76 # Ignore this match if the component is not supported for svn.
80 # Find the intersection between the lines that this file crashed on and
82 (line_number_intersection
, stack_frame_index_intersection
, functions
) = (
83 crash_utils
.Intersection(
84 crashed_line_numbers
, stack_frame_indices
, changed_line_numbers
,
87 # Find the minimum distance between the changed lines and crashed lines.
88 (min_distance
, min_crashed_line
, min_changed_line
) = \
89 crash_utils
.FindMinLineDistance(crashed_line_numbers
,
92 # Check whether this CL changes the crashed lines or not.
93 if line_number_intersection
:
94 priority
= LINE_CHANGE_PRIORITY
96 priority
= FILE_CHANGE_PRIORITY
98 # Add the parsed information to the object.
99 with matches
.matches_lock
:
100 match
.crashed_line_numbers
.append(line_number_intersection
)
102 file_name
= file_path
.split('/')[-1]
103 match
.changed_files
.append(file_name
)
105 # Update the min distance only if it is less than the current one.
106 if min_distance
< match
.min_distance
:
107 match
.min_distance
= min_distance
108 match
.min_distance_info
= (file_name
, min_crashed_line
, min_changed_line
)
110 # If this CL does not change the crashed line, all occurrence of this
111 # file in the stack has the same priority.
112 if not stack_frame_index_intersection
:
113 stack_frame_index_intersection
= stack_frame_indices
115 match
.stack_frame_indices
.append(stack_frame_index_intersection
)
116 match
.changed_file_urls
.append(diff_url
)
117 match
.priorities
.append(priority
)
118 match
.function_list
.append(functions
)
121 def FindMatch(revisions_info_map
, file_to_revision_info
, file_to_crash_info
,
122 component_path
, component_name
, repository_parser
,
124 """Finds a CL that modifies file in the stacktrace.
127 revisions_info_map: A dictionary mapping revision number to the CL
129 file_to_revision_info: A dictionary mapping file to the revision that
131 file_to_crash_info: A dictionary mapping file to its occurrence in
133 component_path: The path of the component to search for.
134 component_name: The name of the component to search for.
135 repository_parser: The parser object to parse the line diff.
136 codereview_api_url: A code review url to retrieve data from.
139 Matches, a set of match objects.
141 matches
= match_set
.MatchSet(codereview_api_url
)
144 # Iterate through the crashed files in the stacktrace.
145 for crashed_file_path
in file_to_crash_info
:
146 # Ignore header file.
147 if crashed_file_path
.endswith('.h'):
150 # If the file in the stacktrace is not changed in any commits, continue.
151 for changed_file_path
in file_to_revision_info
:
152 changed_file_name
= changed_file_path
.split('/')[-1].lower()
153 crashed_file_name
= crashed_file_path
.split('/')[-1].lower()
154 if changed_file_name
!= crashed_file_name
:
157 if not crash_utils
.GuessIfSameSubPath(
158 changed_file_path
.lower(), crashed_file_path
.lower()):
161 crashed_line_numbers
= file_to_crash_info
.GetCrashedLineNumbers(
163 stack_frame_nums
= file_to_crash_info
.GetCrashStackFrameIndices(
165 functions
= file_to_crash_info
.GetCrashFunctions(crashed_file_path
)
167 # Iterate through the CLs that this file path is changed.
168 for (cl
, file_change_type
) in file_to_revision_info
[changed_file_path
]:
169 # If the file change is delete, ignore this CL.
170 if file_change_type
== 'D':
173 revision
= revisions_info_map
[cl
]
176 'function': GenerateMatchEntry
,
177 'args':[matches
, revision
, cl
, changed_file_path
, functions
,
178 component_path
, component_name
, crashed_line_numbers
,
179 stack_frame_nums
, file_change_type
,
183 crash_utils
.RunTasks(tasks
)
185 matches
.RemoveRevertedCLs()
190 def FindMatchForComponent(component_path
, file_to_crash_info
, changelog
,
191 callstack_priority
, results
, results_lock
):
192 """Parses changelog and finds suspected CLs for a given component.
195 component_path: The path of component to look for the culprit CL.
196 file_to_crash_info: A dictionary mapping file to its occurrence in
198 changelog: The parsed changelog for this component.
199 callstack_priority: The priority of this call stack, 0 if from crash stack,
200 1 if from freed, 2 if from previously allocated.
201 results: A dictionary to store the result.
202 results_lock: A lock that guards results.
204 (repository_parser
, component_name
, revisions
, file_to_revision_map
) = \
207 # Find match for this component.
208 codereview_api_url
= CONFIG
['codereview']['review_url']
209 component_result
= FindMatch(
210 revisions
, file_to_revision_map
, file_to_crash_info
, component_path
,
211 component_name
, repository_parser
, codereview_api_url
)
212 matches
= component_result
.matches
214 # For all the match results in a dictionary, add to the list so that it
219 results
.append((callstack_priority
, cl
, match
))
222 def FindMatchForCallstack(
223 callstack
, components
, component_to_changelog_map
, results
,
225 """Finds culprit cl for a stack within a stacktrace.
227 For each components to look for, create new thread that computes the matches
228 and join the results at the end.
231 callstack: A callstack in a stacktrace to find the result for.
232 components: A set of components to look for.
233 component_to_changelog_map: A map from component to its parsed changelog.
234 results: A list to aggregrate results from all stacktraces.
235 results_lock: A lock that guards results.
237 # Create component dictionary from the component and call stack.
238 component_dict
= component_dictionary
.ComponentDictionary(callstack
,
240 callstack_priority
= callstack
.priority
242 # Iterate through all components.
243 for component_path
in component_dict
:
244 # If the component to consider in this callstack is not in the parsed list
245 # of components, ignore this one.
246 if component_path
not in component_to_changelog_map
:
249 changelog
= component_to_changelog_map
[component_path
]
250 file_to_crash_info
= component_dict
.GetFileDict(component_path
)
251 FindMatchForComponent(component_path
, file_to_crash_info
, changelog
,
252 callstack_priority
, results
, results_lock
)
255 def FindMatchForStacktrace(stacktrace
, components
,
256 component_to_regression_dict
):
257 """Finds the culprit CL for stacktrace.
259 The passed stacktrace is either from release build stacktrace
260 or debug build stacktrace.
263 stacktrace: A list of parsed stacks within a stacktrace.
264 components: A set of components to look for.
265 component_to_regression_dict: A dictionary mapping component path to
269 A list of match results from all stacks.
271 # A list to aggregate results from all the callstacks in the stacktrace.
273 results_lock
= Lock()
276 svn_parser
= svn_repository_parser
.SVNParser(CONFIG
['svn'])
277 git_parser
= git_repository_parser
.GitParser(component_to_regression_dict
,
280 # Create a cache of parsed revisions.
281 component_to_changelog_map
= {}
282 for component_path
in components
:
283 component_object
= component_to_regression_dict
[component_path
]
284 range_start
= component_object
['old_revision']
285 range_end
= component_object
['new_revision']
287 # If range start is 0, the range is too large and the crash has been
288 # introduced the archived build, so ignore this case.
289 if range_start
== '0':
292 component_name
= component_to_regression_dict
[component_path
]['name']
294 is_git
= utils
.IsGitHash(range_start
)
296 repository_parser
= git_parser
298 repository_parser
= svn_parser
300 (revisions
, file_to_revision_map
) = repository_parser
.ParseChangelog(
301 component_path
, range_start
, range_end
)
303 # If the returned map from ParseChangeLog is empty, we don't need to look
304 # further because either the parsing failed or the changelog is empty.
305 if not (revisions
and file_to_revision_map
):
308 component_to_changelog_map
[component_path
] = (repository_parser
,
311 file_to_revision_map
)
313 # Analyze each of the call stacks in the stacktrace.
314 for callstack
in stacktrace
.stack_list
:
315 FindMatchForCallstack(callstack
, components
, component_to_changelog_map
,
316 results
, results_lock
)
321 def SortMatchesFunction(match_with_stack_priority
):
322 """A function to sort the match triple.
324 Currently, it sorts the list by:
325 1) The highest priority file change in the CL (changing crashed line is
326 higher priority than just changing the file).
327 2) The callstack this match is computed (crash stack, freed, allocation).
328 3) The minimum stack frame number of the changed file in the match.
329 4) The number of files this CL changes (higher the better).
330 5) The minimum distance between the lines that the CL changes and crashed
334 match_with_stack_priority: A match object, with the CL it is from and what
335 callstack it is from.
340 (stack_priority
, _
, match
) = match_with_stack_priority
342 return (min(match
.priorities
),
345 crash_utils
.FindMinStackFrameNumber(match
.stack_frame_indices
,
347 -len(match
.changed_files
))
350 def SortAndFilterMatches(matches
, num_important_frames
=5):
351 """Filters the list of potential culprit CLs to remove noise.
354 matches: A list containing match results.
355 num_important_frames: A number of frames on the top of the frame to Check
356 for when filtering the results. A match with a file
357 that is in top num_important_frames of the stacktrace
358 is regarded more probable then others.
361 Filtered match results.
365 is_important_frame
= False
366 highest_priority_stack
= crash_utils
.INFINITY
367 matches
.sort(key
=SortMatchesFunction
)
368 # Iterate through the matches to find out what results are significant.
369 for stack_priority
, cl
, match
in matches
:
370 # Check if the current match changes crashed line.
371 is_line_change
= (min(match
.priorities
) == LINE_CHANGE_PRIORITY
)
373 # Check which stack this match is from, and finds the highest priority
374 # callstack up to this point.
375 current_stack
= stack_priority
376 if current_stack
< highest_priority_stack
:
377 highest_priority_stack
= current_stack
379 # Check if current match changes a file that occurs in crash state.
380 flattened_stack_frame_indices
= [frame
for frame_indices
in
381 match
.stack_frame_indices
382 for frame
in frame_indices
]
383 current_is_important
= (
384 min(flattened_stack_frame_indices
) < num_important_frames
)
386 # This match and anything lower than this should be ignored if:
387 # - Current match does not change crashed lines but there are matches
389 # - Current match is not in crash state but there are matches in it.
390 # - There are other matches that came from higher priority stack.
391 if (line_changed
and not is_line_change
) or (
392 is_important_frame
and not current_is_important
) or (
393 current_stack
> highest_priority_stack
):
396 # Update the variables.
399 if current_is_important
:
400 is_important_frame
= True
402 # Add current match to the filtered result.
403 new_matches
.append((stack_priority
, cl
, match
))
408 def GenerateReasonForMatches(matches
):
409 """Generates a reason that a match (CL) is a culprit cl.
412 matches: A list of match objects.
414 # Iterate through the matches in the list.
415 for i
, _
, match
in matches
:
418 # Zip the files in the match by the reason they are suspected
419 # (how the file is modified).
420 match_by_priority
= zip(
421 match
.priorities
, match
.crashed_line_numbers
, match
.changed_files
,
422 match
.stack_frame_indices
, match
.function_list
)
424 # Sort the zipped changed files in the match by their priority so that the
425 # changed lines comes first in the reason.
426 match_by_priority
.sort(
427 key
=lambda (priority
, crashed_line_numbers
, file_name
,
428 stack_frame_indices
, function_list
): priority
)
430 # Iterate through the sorted match.
431 for i
in range(len(match_by_priority
)):
432 (priority
, crashed_line_numbers
, file_name
, stack_frame_indices
,
433 function_list
) = match_by_priority
[i
]
435 # If the file in the match is a line change, append a explanation.
436 if priority
== LINE_CHANGE_PRIORITY
:
437 crashed_line_numbers
= [crashed_line_number
438 for lines
in crashed_line_numbers
439 for crashed_line_number
in lines
]
441 'Lines %s of file %s which potentially caused crash '
442 'are changed in this cl (%s).\n' %
443 (utils
.JoinLineNumbers(crashed_line_numbers
, accepted_gap
=4),
445 crash_utils
.PrettifyFrameInfo(stack_frame_indices
, function_list
)))
448 # Get all the files that are not line change.
449 rest_of_the_files
= match_by_priority
[i
:]
451 if len(rest_of_the_files
) == 1:
452 file_string
= 'File %s is changed in this cl '
454 file_string
= 'Files %s are changed in this cl '
456 # Create a list of file names, and prettify the list.
458 file_name
for (_
, _
, file_name
, _
, _
) in rest_of_the_files
]
459 pretty_file_names
= crash_utils
.PrettifyList(file_names
)
461 # Add the reason, break because we took care of the rest of the files.
462 file_string
+= ('(and is part of stack %s)' %
463 crash_utils
.PrettifyFrameInfo(stack_frame_indices
, function_list
))
464 reason
.append(file_string
% pretty_file_names
)
467 # Set the reason as string.
468 match
.reason
= '\n'.join(reason
)
471 def CombineMatches(matches
):
472 """Combine possible duplicates in matches.
475 matches: A list of matches object, along with its callstack priority and
478 A combined list of matches.
480 combined_matches
= []
482 for stack_index
, cl
, match
in matches
:
485 # Iterate through the list of combined matches.
486 for _
, cl_combined
, match_combined
in combined_matches
:
487 # Check for if current CL is already higher up in the result.
488 if cl
== cl_combined
:
489 found_match
= match_combined
492 # If current match is not already in, add it to the list of matches.
494 combined_matches
.append((stack_index
, cl
, match
))
497 # Combine the reason if the current match is already in there.
498 found_match
.reason
+= '\n' + match
.reason
499 if match
.min_distance
< found_match
.min_distance
:
500 found_match
.min_distance
= match
.min_distance
501 found_match
.min_distance_info
= match
.min_distance_info
503 for stack_index
, cl
, match
in combined_matches
:
504 if match
.min_distance_info
:
505 file_name
, min_crashed_line
, min_changed_line
= match
.min_distance_info
506 match
.reason
= match
.reason
.strip()
508 '\nMinimum distance from crash line to modified line: %d. '
509 '(file: %s, crashed on: %d, modified: %d).' %
510 (match
.min_distance
, file_name
, min_crashed_line
, min_changed_line
))
512 return combined_matches
515 def FilterAndGenerateReasonForMatches(result
):
516 """A wrapper function.
518 It generates reasons for the matches and returns string representation
522 result: A list of match objects.
525 A string representation of filtered results.
527 new_result
= SortAndFilterMatches(result
)
528 GenerateReasonForMatches(new_result
)
529 combined_matches
= CombineMatches(new_result
)
530 return crash_utils
.MatchListToResultList(combined_matches
)
533 def ParseCrashComponents(main_stack
):
534 """Parses the crashing component.
536 Crashing components is a component that top_n_frames of the stacktrace is
540 main_stack: Main stack from the stacktrace.
547 for frame
in main_stack
.frame_list
:
548 components
.add(frame
.component_path
)
553 def GenerateAndFilterBlameList(callstack
, component_to_crash_revision_dict
,
554 component_to_regression_dict
):
555 """A wrapper function.
557 Finds blame information for stack and returns string representation.
560 callstack: A callstack to find the blame information.
561 component_to_crash_revision_dict: A dictionary mapping component to its
563 component_to_regression_dict: A dictionary mapping component to its
567 A list of blame results.
569 if component_to_regression_dict
:
570 parsed_deps
= component_to_regression_dict
572 parsed_deps
= component_to_crash_revision_dict
574 # Setup parser objects to use for parsing blame information.
575 svn_parser
= svn_repository_parser
.SVNParser(CONFIG
['svn'])
576 git_parser
= git_repository_parser
.GitParser(parsed_deps
, CONFIG
['git'])
578 parsers
['svn'] = svn_parser
579 parsers
['git'] = git_parser
581 # Create and generate the blame objects from the callstack.
582 blame_list
= blame
.BlameList()
583 blame_list
.FindBlame(callstack
, component_to_crash_revision_dict
,
584 component_to_regression_dict
,
587 blame_list
.FilterAndSortBlameList()
588 return crash_utils
.BlameListToResultList(blame_list
)
591 def FindItForCrash(stacktrace_list
,
593 component_to_regression_dict
,
594 component_to_crash_revision_dict
):
595 """Finds the culprit CL from the list of stacktrace.
598 stacktrace_list: A list of stacktraces to look for, in the order of
599 decreasing significance.
600 callstack: A callstack object to show blame information for, if there are
601 no results for all stacktraces in the stacktrace_list.
602 component_to_regression_dict: A parsed regression information as a
603 result of parsing DEPS file.
604 component_to_crash_revision_dict: A parsed crash revision information.
607 A list of result objects, with the message how the result is created.
609 # If regression information is not available, return blame information.
610 if not component_to_regression_dict
:
611 result
= GenerateAndFilterBlameList(callstack
,
612 component_to_crash_revision_dict
,
613 component_to_regression_dict
)
616 'Regression information is not available. The result is '
617 'the blame information.')
619 return_message
= ('Findit could not find any suspected CLs.')
621 return (return_message
, result
)
623 for stacktrace
in stacktrace_list
:
624 # Check the next stacktrace if current one is empty.
625 if not stacktrace
.stack_list
:
628 # Get the crash stack for this stacktrace, and extract crashing components
630 main_stack
= stacktrace
.GetCrashStack()
631 components
= ParseCrashComponents(main_stack
)
633 result_for_stacktrace
= FindMatchForStacktrace(
634 stacktrace
, components
, component_to_regression_dict
)
635 filtered_result
= FilterAndGenerateReasonForMatches(result_for_stacktrace
)
637 # If the result is empty, check the next stacktrace. Else, return the
639 if not filtered_result
:
643 'The result is a list of CLs that change the crashed files.')
644 return (return_message
, filtered_result
)
646 # If no match is found, return the blame information for the input
648 result
= GenerateAndFilterBlameList(
649 callstack
, component_to_crash_revision_dict
,
650 component_to_regression_dict
)
654 'No CL in the regression range changes the crashed files. '
655 'The result is the blame information.')
657 # When findit could not find any CL that changes file in stacktrace or if
658 # if cannot get any blame information, return a message saying that no
659 # results are available.
661 return_message
= ('Findit could not find any suspected CLs.')
663 return (return_message
, result
)