Update V8 to version 3.29.50 (based on bleeding_edge revision r23756).
[chromium-blink-merge.git] / tools / deep_memory_profiler / lib / deep_dump.py
blobdc37ea0c7764d65f4daf26f22a90aa917172769d
1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 import copy
6 import datetime
7 import logging
8 import os
9 import re
10 import time
12 from lib.dump import Dump
13 from lib.exceptions import EmptyDumpException, InvalidDumpException
14 from lib.exceptions import ObsoleteDumpVersionException, ParsingException
15 from lib.pageframe import PageFrame
16 from lib.range_dict import ExclusiveRangeDict
17 from lib.symbol import procfs
20 LOGGER = logging.getLogger('dmprof')
21 VIRTUAL, COMMITTED, ALLOC_COUNT, FREE_COUNT, _AT, BUCKET_ID = range(6)
24 # Heap Profile Dump versions
26 # DUMP_DEEP_[1-4] are obsolete.
27 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks.
28 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps.
29 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".
30 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.
31 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.
32 DUMP_DEEP_1 = 'DUMP_DEEP_1'
33 DUMP_DEEP_2 = 'DUMP_DEEP_2'
34 DUMP_DEEP_3 = 'DUMP_DEEP_3'
35 DUMP_DEEP_4 = 'DUMP_DEEP_4'
37 DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)
39 # DUMP_DEEP_5 doesn't separate sections for malloc and mmap.
40 # malloc and mmap are identified in bucket files.
41 # DUMP_DEEP_5 should be processed by POLICY_DEEP_4.
42 DUMP_DEEP_5 = 'DUMP_DEEP_5'
44 # DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5.
45 DUMP_DEEP_6 = 'DUMP_DEEP_6'
48 class DeepDump(Dump):
49 """Represents a heap profile dump."""
51 _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')
53 _HOOK_PATTERN = re.compile(
54 r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+'
55 r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE)
57 _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
58 '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)')
59 _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
60 '(?P<RESERVED>[0-9]+)')
62 _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)')
63 _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)')
65 _TIME_PATTERN_FORMAT = re.compile(
66 r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?')
67 _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$')
69 def __init__(self, path, modified_time):
70 super(DeepDump, self).__init__()
71 self._path = path
72 matched = self._PATH_PATTERN.match(path)
73 self._pid = int(matched.group(2))
74 self._count = int(matched.group(3))
75 self._time = modified_time
76 self._map = {}
77 self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute)
78 self._stacktrace_lines = []
79 self._global_stats = {} # used only in apply_policy
81 self._run_id = ''
82 self._pagesize = 4096
83 self._pageframe_length = 0
84 self._pageframe_encoding = ''
85 self._has_pagecount = False
87 self._version = ''
88 self._lines = []
90 @property
91 def path(self):
92 return self._path
94 @property
95 def count(self):
96 return self._count
98 @property
99 def time(self):
100 return self._time
102 @property
103 def iter_map(self):
104 for region in sorted(self._map.iteritems()):
105 yield region[0], region[1]
107 @property
108 def iter_stacktrace(self):
109 for line in self._stacktrace_lines:
110 words = line.split()
111 yield (int(words[BUCKET_ID]),
112 int(words[VIRTUAL]),
113 int(words[COMMITTED]),
114 int(words[ALLOC_COUNT]),
115 int(words[FREE_COUNT]))
117 def global_stat(self, name):
118 return self._global_stats[name]
120 @property
121 def run_id(self):
122 return self._run_id
124 @property
125 def pagesize(self):
126 return self._pagesize
128 @property
129 def pageframe_length(self):
130 return self._pageframe_length
132 @property
133 def pageframe_encoding(self):
134 return self._pageframe_encoding
136 @property
137 def has_pagecount(self):
138 return self._has_pagecount
140 @staticmethod
141 def load(path, log_header='Loading a heap profile dump: '):
142 """Loads a heap profile dump.
144 Args:
145 path: A file path string to load.
146 log_header: A preceding string for log messages.
148 Returns:
149 A loaded Dump object.
151 Raises:
152 ParsingException for invalid heap profile dumps.
154 dump = Dump(path, os.stat(path).st_mtime)
155 with open(path, 'r') as f:
156 dump.load_file(f, log_header)
157 return dump
159 def load_file(self, f, log_header):
160 self._lines = [line for line in f
161 if line and not line.startswith('#')]
163 try:
164 self._version, ln = self._parse_version()
165 self._parse_meta_information()
166 if self._version == DUMP_DEEP_6:
167 self._parse_mmap_list()
168 self._parse_global_stats()
169 self._extract_stacktrace_lines(ln)
170 except EmptyDumpException:
171 LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path))
172 except ParsingException, e:
173 LOGGER.error('%s%s ...error %s' % (log_header, self._path, e))
174 raise
175 else:
176 LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version))
178 def _parse_version(self):
179 """Parses a version string in self._lines.
181 Returns:
182 A pair of (a string representing a version of the stacktrace dump,
183 and an integer indicating a line number next to the version string).
185 Raises:
186 ParsingException for invalid dump versions.
188 version = ''
190 # Skip until an identifiable line.
191 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
192 if not self._lines:
193 raise EmptyDumpException('Empty heap dump file.')
194 (ln, found) = skip_while(
195 0, len(self._lines),
196 lambda n: not self._lines[n].startswith(headers))
197 if not found:
198 raise InvalidDumpException('No version header.')
200 # Identify a version.
201 if self._lines[ln].startswith('heap profile: '):
202 version = self._lines[ln][13:].strip()
203 if version in (DUMP_DEEP_5, DUMP_DEEP_6):
204 (ln, _) = skip_while(
205 ln, len(self._lines),
206 lambda n: self._lines[n] != 'STACKTRACES:\n')
207 elif version in DUMP_DEEP_OBSOLETE:
208 raise ObsoleteDumpVersionException(version)
209 else:
210 raise InvalidDumpException('Invalid version: %s' % version)
211 elif self._lines[ln] == 'STACKTRACES:\n':
212 raise ObsoleteDumpVersionException(DUMP_DEEP_1)
213 elif self._lines[ln] == 'MMAP_STACKTRACES:\n':
214 raise ObsoleteDumpVersionException(DUMP_DEEP_2)
216 return (version, ln)
218 def _parse_global_stats(self):
219 """Parses lines in self._lines as global stats."""
220 (ln, _) = skip_while(
221 0, len(self._lines),
222 lambda n: self._lines[n] != 'GLOBAL_STATS:\n')
224 global_stat_names = [
225 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack',
226 'other', 'nonprofiled-absent', 'nonprofiled-anonymous',
227 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
228 'nonprofiled-stack', 'nonprofiled-other',
229 'profiled-mmap', 'profiled-malloc']
231 for prefix in global_stat_names:
232 (ln, _) = skip_while(
233 ln, len(self._lines),
234 lambda n: self._lines[n].split()[0] != prefix)
235 words = self._lines[ln].split()
236 self._global_stats[prefix + '_virtual'] = int(words[-2])
237 self._global_stats[prefix + '_committed'] = int(words[-1])
239 def _parse_meta_information(self):
240 """Parses lines in self._lines for meta information."""
241 (ln, found) = skip_while(
242 0, len(self._lines),
243 lambda n: self._lines[n] != 'META:\n')
244 if not found:
245 return
246 ln += 1
248 while True:
249 if self._lines[ln].startswith('Time:'):
250 matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln])
251 matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln])
252 if matched_format:
253 self._time = time.mktime(datetime.datetime.strptime(
254 matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple())
255 if matched_format.group(2):
256 self._time += float(matched_format.group(2)[1:]) / 1000.0
257 elif matched_seconds:
258 self._time = float(matched_seconds.group(1))
259 elif self._lines[ln].startswith('Reason:'):
260 pass # Nothing to do for 'Reason:'
261 elif self._lines[ln].startswith('PageSize: '):
262 self._pagesize = int(self._lines[ln][10:])
263 elif self._lines[ln].startswith('CommandLine:'):
264 pass
265 elif (self._lines[ln].startswith('PageFrame: ') or
266 self._lines[ln].startswith('PFN: ')):
267 if self._lines[ln].startswith('PageFrame: '):
268 words = self._lines[ln][11:].split(',')
269 else:
270 words = self._lines[ln][5:].split(',')
271 for word in words:
272 if word == '24':
273 self._pageframe_length = 24
274 elif word == 'Base64':
275 self._pageframe_encoding = 'base64'
276 elif word == 'PageCount':
277 self._has_pagecount = True
278 elif self._lines[ln].startswith('RunID: '):
279 self._run_id = self._lines[ln][7:].strip()
280 elif (self._lines[ln].startswith('MMAP_LIST:') or
281 self._lines[ln].startswith('GLOBAL_STATS:')):
282 # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found.
283 break
284 else:
285 pass
286 ln += 1
288 def _parse_mmap_list(self):
289 """Parses lines in self._lines as a mmap list."""
290 (ln, found) = skip_while(
291 0, len(self._lines),
292 lambda n: self._lines[n] != 'MMAP_LIST:\n')
293 if not found:
294 return {}
296 ln += 1
297 self._map = {}
298 current_vma = {}
299 pageframe_list = []
300 while True:
301 entry = procfs.ProcMaps.parse_line(self._lines[ln])
302 if entry:
303 current_vma = {}
304 for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end):
305 for key, value in entry.as_dict().iteritems():
306 attr[key] = value
307 current_vma[key] = value
308 ln += 1
309 continue
311 if self._lines[ln].startswith(' PF: '):
312 for pageframe in self._lines[ln][5:].split():
313 pageframe_list.append(PageFrame.parse(pageframe, self._pagesize))
314 ln += 1
315 continue
317 matched = self._HOOK_PATTERN.match(self._lines[ln])
318 if not matched:
319 break
320 # 2: starting address
321 # 5: end address
322 # 7: hooked or unhooked
323 # 8: additional information
324 if matched.group(7) == 'hooked':
325 submatched = self._HOOKED_PATTERN.match(matched.group(8))
326 if not submatched:
327 submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8))
328 elif matched.group(7) == 'unhooked':
329 submatched = self._UNHOOKED_PATTERN.match(matched.group(8))
330 if not submatched:
331 submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8))
332 else:
333 assert matched.group(7) in ['hooked', 'unhooked']
335 submatched_dict = submatched.groupdict()
336 region_info = { 'vma': current_vma }
337 if submatched_dict.get('TYPE'):
338 region_info['type'] = submatched_dict['TYPE'].strip()
339 if submatched_dict.get('COMMITTED'):
340 region_info['committed'] = int(submatched_dict['COMMITTED'])
341 if submatched_dict.get('RESERVED'):
342 region_info['reserved'] = int(submatched_dict['RESERVED'])
343 if submatched_dict.get('BUCKETID'):
344 region_info['bucket_id'] = int(submatched_dict['BUCKETID'])
346 if matched.group(1) == '(':
347 start = current_vma['begin']
348 else:
349 start = int(matched.group(2), 16)
350 if matched.group(4) == '(':
351 end = current_vma['end']
352 else:
353 end = int(matched.group(5), 16)
355 if pageframe_list and pageframe_list[0].start_truncated:
356 pageframe_list[0].set_size(
357 pageframe_list[0].size - start % self._pagesize)
358 if pageframe_list and pageframe_list[-1].end_truncated:
359 pageframe_list[-1].set_size(
360 pageframe_list[-1].size - (self._pagesize - end % self._pagesize))
361 region_info['pageframe'] = pageframe_list
362 pageframe_list = []
364 self._map[(start, end)] = (matched.group(7), region_info)
365 ln += 1
367 def _extract_stacktrace_lines(self, line_number):
368 """Extracts the position of stacktrace lines.
370 Valid stacktrace lines are stored into self._stacktrace_lines.
372 Args:
373 line_number: A line number to start parsing in lines.
375 Raises:
376 ParsingException for invalid dump versions.
378 if self._version in (DUMP_DEEP_5, DUMP_DEEP_6):
379 (line_number, _) = skip_while(
380 line_number, len(self._lines),
381 lambda n: not self._lines[n].split()[0].isdigit())
382 stacktrace_start = line_number
383 (line_number, _) = skip_while(
384 line_number, len(self._lines),
385 lambda n: self._check_stacktrace_line(self._lines[n]))
386 self._stacktrace_lines = self._lines[stacktrace_start:line_number]
388 elif self._version in DUMP_DEEP_OBSOLETE:
389 raise ObsoleteDumpVersionException(self._version)
391 else:
392 raise InvalidDumpException('Invalid version: %s' % self._version)
394 @staticmethod
395 def _check_stacktrace_line(stacktrace_line):
396 """Checks if a given stacktrace_line is valid as stacktrace.
398 Args:
399 stacktrace_line: A string to be checked.
401 Returns:
402 True if the given stacktrace_line is valid.
404 words = stacktrace_line.split()
405 if len(words) < BUCKET_ID + 1:
406 return False
407 if words[BUCKET_ID - 1] != '@':
408 return False
409 return True
412 class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute):
413 """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict."""
414 _DUMMY_ENTRY = procfs.ProcMapsEntry(
415 0, # begin
416 0, # end
417 '-', # readable
418 '-', # writable
419 '-', # executable
420 '-', # private
421 0, # offset
422 '00', # major
423 '00', # minor
424 0, # inode
425 '' # name
428 def __init__(self):
429 super(ProcMapsEntryAttribute, self).__init__()
430 self._entry = self._DUMMY_ENTRY.as_dict()
432 def __str__(self):
433 return str(self._entry)
435 def __repr__(self):
436 return 'ProcMapsEntryAttribute' + str(self._entry)
438 def __getitem__(self, key):
439 return self._entry[key]
441 def __setitem__(self, key, value):
442 if key not in self._entry:
443 raise KeyError(key)
444 self._entry[key] = value
446 def copy(self):
447 new_entry = ProcMapsEntryAttribute()
448 for key, value in self._entry.iteritems():
449 new_entry[key] = copy.deepcopy(value)
450 return new_entry
453 def skip_while(index, max_index, skipping_condition):
454 """Increments |index| until |skipping_condition|(|index|) is False.
456 Returns:
457 A pair of an integer indicating a line number after skipped, and a
458 boolean value which is True if found a line which skipping_condition
459 is False for.
461 while skipping_condition(index):
462 index += 1
463 if index >= max_index:
464 return index, False
465 return index, True