Give names to all utility processes.
[chromium-blink-merge.git] / tools / findit / crash_utils.py
blob7e8011334e245959babd2f324f8f35912da685d8
1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 import atexit
6 import cgi
7 import ConfigParser
8 import json
9 import os
10 import Queue
11 import threading
12 import time
14 from common import utils
15 from result import Result
18 INFINITY = float('inf')
20 MAX_THREAD_NUMBER = 10
21 TASK_QUEUE = None
24 def SignalWorkerThreads():
25 global TASK_QUEUE
26 if not TASK_QUEUE:
27 return
29 for i in range(MAX_THREAD_NUMBER):
30 TASK_QUEUE.put(None)
32 # Give worker threads a chance to exit.
33 # Workaround the harmless bug in python 2.7 below.
34 time.sleep(1)
37 atexit.register(SignalWorkerThreads)
40 def Worker():
41 global TASK_QUEUE
42 while True:
43 try:
44 task = TASK_QUEUE.get()
45 if not task:
46 return
47 except TypeError:
48 # According to http://bugs.python.org/issue14623, this is a harmless bug
49 # in python 2.7 which won't be fixed.
50 # The exception is raised on daemon threads when python interpreter is
51 # shutting down.
52 return
54 function, args, kwargs, result_semaphore = task
55 try:
56 function(*args, **kwargs)
57 except:
58 pass
59 finally:
60 # Signal one task is done in case of exception.
61 result_semaphore.release()
64 def RunTasks(tasks):
65 """Run given tasks. Not thread-safe: no concurrent calls of this function.
67 Return after all tasks were completed. A task is a dict as below:
69 'function': the function to call,
70 'args': the positional argument to pass to the function,
71 'kwargs': the key-value arguments to pass to the function,
73 """
74 if not tasks:
75 return
77 global TASK_QUEUE
78 if not TASK_QUEUE:
79 TASK_QUEUE = Queue.Queue()
80 for index in range(MAX_THREAD_NUMBER):
81 thread = threading.Thread(target=Worker, name='worker_%s' % index)
82 # Set as daemon, so no join is needed.
83 thread.daemon = True
84 thread.start()
86 result_semaphore = threading.Semaphore(0)
87 # Push task to task queue for execution.
88 for task in tasks:
89 TASK_QUEUE.put(
90 (task['function'], task.get('args', []),
91 task.get('kwargs', {}), result_semaphore))
93 # Wait until all tasks to be executed.
94 for _ in tasks:
95 result_semaphore.acquire()
98 def GetRepositoryType(revision_number):
99 """Returns the repository type of this revision number.
101 Args:
102 revision_number: A revision number or git hash.
104 Returns:
105 'git' or 'svn', depending on the revision_number.
107 if utils.IsGitHash(revision_number):
108 return 'git'
109 else:
110 return 'svn'
113 def ParseURLsFromConfig(file_name):
114 """Parses URLS from the config file.
116 The file should be in python config format, where svn section is in the
117 format "svn:component_path".
118 Each of the section for svn should contain changelog_url, revision_url,
119 diff_url and blame_url.
121 Args:
122 file_name: The name of the file that contains URL information.
124 Returns:
125 A dictionary that maps repository type to list of URLs. For svn, it maps
126 key 'svn' to another dictionary, which maps component path to the URLs
127 as explained above. For git, it maps to the URLs as explained above.
129 config = ConfigParser.ConfigParser()
131 # Get the absolute path of the config file, and read the file. If it fails,
132 # return none.
133 config_file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)),
134 file_name)
135 config.read(config_file_path)
136 if not config:
137 return None
139 # Iterate through the config file, check for sections.
140 config_dict = {}
141 for section in config.sections():
142 # These two do not need another layer of dictionary, so add it and go
143 # to next section.
144 if ':' not in section:
145 for option in config.options(section):
146 if section not in config_dict:
147 config_dict[section] = {}
149 url = config.get(section, option)
150 config_dict[section][option] = url
152 continue
154 # Get repository type and component name from the section name.
155 repository_type_and_component = section.split(':')
156 repository_type = repository_type_and_component[0]
157 component_path = repository_type_and_component[1]
159 # Add 'svn' as the key, if it is not already there.
160 if repository_type not in config_dict:
161 config_dict[repository_type] = {}
162 url_map_for_repository = config_dict[repository_type]
164 # Add the path to the 'svn', if it is not already there.
165 if component_path not in url_map_for_repository:
166 url_map_for_repository[component_path] = {}
167 type_to_url = url_map_for_repository[component_path]
169 # Add all URLs to this map.
170 for option in config.options(section):
171 url = config.get(section, option)
172 type_to_url[option] = url
174 return config_dict
177 def NormalizePath(path, parsed_deps):
178 """Normalizes the path.
180 Args:
181 path: A string representing a path.
182 parsed_deps: A map from component path to its component name, repository,
183 etc.
185 Returns:
186 A tuple containing a component this path is in (e.g blink, skia, etc)
187 and a path in that component's repository. Returns None if the component
188 repository is not supported, i.e from googlecode.
190 # First normalize the path by retreiving the normalized path.
191 normalized_path = os.path.normpath(path).replace('\\', '/')
193 # Iterate through all component paths in the parsed DEPS, in the decreasing
194 # order of the length of the file path.
195 for component_path in sorted(parsed_deps,
196 key=(lambda path: -len(path))):
197 # new_component_path is the component path with 'src/' removed.
198 new_component_path = component_path
199 if new_component_path.startswith('src/') and new_component_path != 'src/':
200 new_component_path = new_component_path[len('src/'):]
202 # We need to consider when the lowercased component path is in the path,
203 # because syzyasan build returns lowercased file path.
204 lower_component_path = new_component_path.lower()
206 # If this path is the part of file path, this file must be from this
207 # component.
208 if new_component_path in normalized_path or \
209 lower_component_path in normalized_path:
211 # Case when the retreived path is in lowercase.
212 if lower_component_path in normalized_path:
213 current_component_path = lower_component_path
214 else:
215 current_component_path = new_component_path
217 # Normalize the path by stripping everything off the component's relative
218 # path.
219 normalized_path = normalized_path.split(current_component_path, 1)[1]
220 lower_normalized_path = normalized_path.lower()
222 # Add 'src/' or 'Source/' at the front of the normalized path, depending
223 # on what prefix the component path uses. For example, blink uses
224 # 'Source' but chromium uses 'src/', and blink component path is
225 # 'src/third_party/WebKit/Source', so add 'Source/' in front of the
226 # normalized path.
227 if (lower_component_path == 'src/third_party/webkit/source' and
228 not lower_normalized_path.startswith('source/')):
229 normalized_path = (current_component_path.split('/')[-2] + '/' +
230 normalized_path)
232 component_name = parsed_deps[component_path]['name']
234 return (component_path, component_name, normalized_path)
236 # If the path does not match any component, default to chromium.
237 return ('src/', 'chromium', normalized_path)
240 def SplitRange(regression):
241 """Splits a range as retrieved from clusterfuzz.
243 Args:
244 regression: A string in format 'r1234:r5678'.
246 Returns:
247 A list containing two numbers represented in string, for example
248 ['1234','5678'].
250 if not regression:
251 return None
253 revisions = regression.split(':')
255 # If regression information is not available, return none.
256 if len(revisions) != 2:
257 return None
259 range_start = revisions[0]
260 range_end = revisions[1]
262 # Strip 'r' off the range start/end. Not using lstrip to avoid the case when
263 # the range is in git hash and it starts with 'r'.
264 if range_start.startswith('r'):
265 range_start = range_start[1:]
267 if range_end.startswith('r'):
268 range_end = range_end[1:]
270 return [range_start, range_end]
273 def LoadJSON(json_string):
274 """Loads json object from string, or None.
276 Args:
277 json_string: A string to get object from.
279 Returns:
280 JSON object if the string represents a JSON object, None otherwise.
282 try:
283 data = json.loads(json_string)
284 except ValueError:
285 data = None
287 return data
290 def GetDataFromURL(url):
291 """Retrieves raw data from URL, tries 10 times.
293 Args:
294 url: URL to get data from.
295 retries: Number of times to retry connection.
297 Returns:
298 None if the data retrieval fails, or the raw data.
300 status_code, data = utils.GetHttpClient().Get(url, retries=10)
301 if status_code == 200:
302 return data
303 else:
304 # Return None if it fails to read data.
305 return None
308 def FindMinLineDistance(crashed_line_list, changed_line_numbers,
309 line_range=3):
310 """Calculates how far the changed line is from one of the crashes.
312 Finds the minimum distance between the lines that the file crashed on
313 and the lines that the file changed. For example, if the file crashed on
314 line 200 and the CL changes line 203,204 and 205, the function returns 3.
316 Args:
317 crashed_line_list: A list of lines that the file crashed on.
318 changed_line_numbers: A list of lines that the file changed.
319 line_range: Number of lines to look back for.
321 Returns:
322 The minimum distance. If either of the input lists is empty,
323 it returns inf.
326 min_distance = INFINITY
327 crashed_line = -1
328 changed_line = -1
330 crashed_line_numbers = set()
331 for crashed_line_range in crashed_line_list:
332 for crashed_line in crashed_line_range:
333 for line in range(crashed_line - line_range, crashed_line + 1):
334 crashed_line_numbers.add(line)
336 for line in crashed_line_numbers:
337 for distance in changed_line_numbers:
338 # Find the current distance and update the min if current distance is
339 # less than current min.
340 current_distance = abs(line - distance)
341 if current_distance < min_distance:
342 min_distance = current_distance
343 crashed_line = line
344 changed_line = distance
346 return (min_distance, crashed_line, changed_line)
349 def GuessIfSameSubPath(path1, path2):
350 """Guesses if two paths represent same path.
352 Compares the name of the folders in the path (by split('/')), and checks
353 if they match either more than 3 or min of path lengths.
355 Args:
356 path1: First path.
357 path2: Second path to compare.
359 Returns:
360 True if it they are thought to be a same path, False otherwise.
362 path1 = path1.split('/')
363 path2 = path2.split('/')
365 intersection = set(path1).intersection(set(path2))
366 return len(intersection) >= (min(3, min(len(path1), len(path2))))
369 def FindMinStackFrameNumber(stack_frame_indices, priorities):
370 """Finds the minimum stack number, from the list of stack numbers.
372 Args:
373 stack_frame_indices: A list of lists containing stack position.
374 priorities: A list of of priority for each file.
376 Returns:
377 Inf if stack_frame_indices is empty, minimum stack number otherwise.
379 # Get the indexes of the highest priority (or low priority number).
380 highest_priority = min(priorities)
381 highest_priority_indices = []
382 for i in range(len(priorities)):
383 if priorities[i] == highest_priority:
384 highest_priority_indices.append(i)
386 # Gather the list of stack frame numbers for the files that change the
387 # crash lines.
388 flattened = []
389 for i in highest_priority_indices:
390 flattened += stack_frame_indices[i]
392 # If no stack frame information is available, return inf. Else, return min.
393 if not flattened:
394 return INFINITY
395 else:
396 return min(flattened)
399 def AddHyperlink(text, link):
400 """Returns a string with HTML link tag.
402 Args:
403 text: A string to add link.
404 link: A link to add to the string.
406 Returns:
407 A string with hyperlink added.
409 sanitized_link = cgi.escape(link, quote=True)
410 sanitized_text = cgi.escape(str(text))
411 return '<a href="%s">%s</a>' % (sanitized_link, sanitized_text)
414 def PrettifyList(items):
415 """Returns a string representation of a list.
417 It adds comma in between the elements and removes the brackets.
418 Args:
419 items: A list to prettify.
420 Returns:
421 A string representation of the list.
423 return ', '.join(map(str, items))
426 def PrettifyFrameInfo(frame_indices, functions):
427 """Return a string to represent the frames with functions."""
428 frames = []
429 for frame_index, function in zip(frame_indices, functions):
430 frames.append('frame #%s, "%s"' % (frame_index, function.split('(')[0]))
431 return '; '.join(frames)
434 def PrettifyFiles(file_list):
435 """Returns a string representation of a list of file names.
437 Args:
438 file_list: A list of tuple, (file_name, file_url).
439 Returns:
440 A string representation of file names with their urls.
442 ret = ['\n']
443 for file_name, file_url in file_list:
444 ret.append(' %s\n' % AddHyperlink(file_name, file_url))
445 return ''.join(ret)
448 def Intersection(crashed_line_list, stack_frame_index, changed_line_numbers,
449 function, line_range=3):
450 """Finds the overlap betwee changed lines and crashed lines.
452 Finds the intersection of the lines that caused the crash and
453 lines that the file changes. The intersection looks within 3 lines
454 of the line that caused the crash.
456 Args:
457 crashed_line_list: A list of lines that the file crashed on.
458 stack_frame_index: A list of positions in stack for each of the lines.
459 changed_line_numbers: A list of lines that the file changed.
460 function: A list of functions that the file crashed on.
461 line_range: Number of lines to look backwards from crashed lines.
463 Returns:
464 line_number_intersection: Intersection between crashed_line_list and
465 changed_line_numbers.
466 stack_frame_index_intersection: Stack number for each of the intersections.
468 line_number_intersection = []
469 stack_frame_index_intersection = []
470 function_intersection = []
472 # Iterate through the crashed lines, and its occurence in stack.
473 for (lines, stack_frame_index, function_name) in zip(
474 crashed_line_list, stack_frame_index, function):
475 # Also check previous 'line_range' lines. Create a set of all changed lines
476 # and lines within 3 lines range before the crashed line.
477 line_minus_n = set()
478 for line in lines:
479 for line_in_range in range(line - line_range, line + 1):
480 line_minus_n.add(line_in_range)
482 for changed_line in changed_line_numbers:
483 # If a CL does not change crahsed line, check next line.
484 if changed_line not in line_minus_n:
485 continue
487 intersected_line = set()
488 # If the changed line is exactly the crashed line, add that line.
489 for line in lines:
490 if line in changed_line_numbers:
491 intersected_line.add(line)
493 # If the changed line is in 3 lines of the crashed line, add the line.
494 else:
495 intersected_line.add(changed_line)
497 # Avoid adding the same line twice.
498 if intersected_line not in line_number_intersection:
499 line_number_intersection.append(list(intersected_line))
500 stack_frame_index_intersection.append(stack_frame_index)
501 function_intersection.append(function_name)
502 break
504 return (line_number_intersection, stack_frame_index_intersection,
505 function_intersection)
508 def MatchListToResultList(matches):
509 """Convert list of matches to the list of result objects.
511 Args:
512 matches: A list of match objects along with its stack priority and revision
513 number/git hash
514 Returns:
515 A list of result object.
518 result_list = []
520 for _, cl, match in matches:
521 suspected_cl = cl
522 revision_url = match.revision_url
523 component_name = match.component_name
524 author = match.author
525 reason = match.reason.strip()
526 review_url = match.review_url
527 reviewers = match.reviewers
528 # For matches, line content do not exist.
529 line_content = None
530 message = match.message
531 time = match.time
533 result = Result(suspected_cl, revision_url, component_name, author, reason,
534 review_url, reviewers, line_content, message, time)
535 result_list.append(result)
537 return result_list
540 def BlameListToResultList(blame_list):
541 """Convert blame list to the list of result objects.
543 Args:
544 blame_list: A list of blame objects.
546 Returns:
547 A list of result objects.
549 result_list = []
551 for blame in blame_list:
552 suspected_cl = blame.revision
553 revision_url = blame.url
554 component_name = blame.component_name
555 author = blame.author
556 reason = (
557 'The CL last changed line %s of file %s, which is stack frame %d.' %
558 (blame.line_number, blame.file, blame.stack_frame_index))
559 time = blame.time
560 # Blame object does not have review url and reviewers.
561 review_url = None
562 reviewers = None
563 line_content = blame.line_content
564 message = blame.message
566 result = Result(suspected_cl, revision_url, component_name, author, reason,
567 review_url, reviewers, line_content, message, time)
568 result_list.append(result)
570 return result_list