tools/deep_memory_profiler/lib/deep_dump.py

   1 # Copyright 2014 The Chromium Authors. All rights reserved.
   2 # Use of this source code is governed by a BSD-style license that can be
   3 # found in the LICENSE file.
   4
   5 import copy
   6 import datetime
   7 import logging
   8 import os
   9 import re
  10 import time
  11
  12 from lib.dump import Dump
  13 from lib.exceptions import EmptyDumpException, InvalidDumpException
  14 from lib.exceptions import ObsoleteDumpVersionException, ParsingException
  15 from lib.pageframe import PageFrame
  16 from lib.range_dict import ExclusiveRangeDict
  17 from lib.symbol import procfs
  18
  19
  20 LOGGER = logging.getLogger('dmprof')
  21 VIRTUAL, COMMITTED, ALLOC_COUNT, FREE_COUNT, _AT, BUCKET_ID = range(6)
  22
  23
  24 # Heap Profile Dump versions
  25
  26 # DUMP_DEEP_[1-4] are obsolete.
  27 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks.
  28 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps.
  29 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".
  30 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.
  31 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.
  32 DUMP_DEEP_1 = 'DUMP_DEEP_1'
  33 DUMP_DEEP_2 = 'DUMP_DEEP_2'
  34 DUMP_DEEP_3 = 'DUMP_DEEP_3'
  35 DUMP_DEEP_4 = 'DUMP_DEEP_4'
  36
  37 DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)
  38
  39 # DUMP_DEEP_5 doesn't separate sections for malloc and mmap.
  40 # malloc and mmap are identified in bucket files.
  41 # DUMP_DEEP_5 should be processed by POLICY_DEEP_4.
  42 DUMP_DEEP_5 = 'DUMP_DEEP_5'
  43
  44 # DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5.
  45 DUMP_DEEP_6 = 'DUMP_DEEP_6'
  46
  47
  48 class DeepDump(Dump):
  49   """Represents a heap profile dump."""
  50
  51   _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')
  52
  53   _HOOK_PATTERN = re.compile(
  54       r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+'
  55       r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE)
  56
  57   _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
  58                                '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)')
  59   _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
  60                                  '(?P<RESERVED>[0-9]+)')
  61
  62   _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)')
  63   _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)')
  64
  65   _TIME_PATTERN_FORMAT = re.compile(
  66       r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?')
  67   _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$')
  68
  69   def __init__(self, path, modified_time):
  70     super(DeepDump, self).__init__()
  71     self._path = path
  72     matched = self._PATH_PATTERN.match(path)
  73     self._pid = int(matched.group(2))
  74     self._count = int(matched.group(3))
  75     self._time = modified_time
  76     self._map = {}
  77     self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute)
  78     self._stacktrace_lines = []
  79     self._global_stats = {} # used only in apply_policy
  80
  81     self._run_id = ''
  82     self._pagesize = 4096
  83     self._pageframe_length = 0
  84     self._pageframe_encoding = ''
  85     self._has_pagecount = False
  86
  87     self._version = ''
  88     self._lines = []
  89
  90   @property
  91   def path(self):
  92     return self._path
  93
  94   @property
  95   def count(self):
  96     return self._count
  97
  98   @property
  99   def time(self):
 100     return self._time
 101
 102   @property
 103   def iter_map(self):
 104     for region in sorted(self._map.iteritems()):
 105       yield region[0], region[1]
 106
 107   @property
 108   def iter_stacktrace(self):
 109     for line in self._stacktrace_lines:
 110       words = line.split()
 111       yield (int(words[BUCKET_ID]),
 112              int(words[VIRTUAL]),
 113              int(words[COMMITTED]),
 114              int(words[ALLOC_COUNT]),
 115              int(words[FREE_COUNT]))
 116
 117   def global_stat(self, name):
 118     return self._global_stats[name]
 119
 120   @property
 121   def run_id(self):
 122     return self._run_id
 123
 124   @property
 125   def pagesize(self):
 126     return self._pagesize
 127
 128   @property
 129   def pageframe_length(self):
 130     return self._pageframe_length
 131
 132   @property
 133   def pageframe_encoding(self):
 134     return self._pageframe_encoding
 135
 136   @property
 137   def has_pagecount(self):
 138     return self._has_pagecount
 139
 140   @staticmethod
 141   def load(path, log_header='Loading a heap profile dump: '):
 142     """Loads a heap profile dump.
 143
 144     Args:
 145         path: A file path string to load.
 146         log_header: A preceding string for log messages.
 147
 148     Returns:
 149         A loaded Dump object.
 150
 151     Raises:
 152         ParsingException for invalid heap profile dumps.
 153     """
 154     dump = Dump(path, os.stat(path).st_mtime)
 155     with open(path, 'r') as f:
 156       dump.load_file(f, log_header)
 157     return dump
 158
 159   def load_file(self, f, log_header):
 160     self._lines = [line for line in f
 161                    if line and not line.startswith('#')]
 162
 163     try:
 164       self._version, ln = self._parse_version()
 165       self._parse_meta_information()
 166       if self._version == DUMP_DEEP_6:
 167         self._parse_mmap_list()
 168       self._parse_global_stats()
 169       self._extract_stacktrace_lines(ln)
 170     except EmptyDumpException:
 171       LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path))
 172     except ParsingException, e:
 173       LOGGER.error('%s%s ...error %s' % (log_header, self._path, e))
 174       raise
 175     else:
 176       LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version))
 177
 178   def _parse_version(self):
 179     """Parses a version string in self._lines.
 180
 181     Returns:
 182         A pair of (a string representing a version of the stacktrace dump,
 183         and an integer indicating a line number next to the version string).
 184
 185     Raises:
 186         ParsingException for invalid dump versions.
 187     """
 188     version = ''
 189
 190     # Skip until an identifiable line.
 191     headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
 192     if not self._lines:
 193       raise EmptyDumpException('Empty heap dump file.')
 194     (ln, found) = skip_while(
 195         0, len(self._lines),
 196         lambda n: not self._lines[n].startswith(headers))
 197     if not found:
 198       raise InvalidDumpException('No version header.')
 199
 200     # Identify a version.
 201     if self._lines[ln].startswith('heap profile: '):
 202       version = self._lines[ln][13:].strip()
 203       if version in (DUMP_DEEP_5, DUMP_DEEP_6):
 204         (ln, _) = skip_while(
 205             ln, len(self._lines),
 206             lambda n: self._lines[n] != 'STACKTRACES:\n')
 207       elif version in DUMP_DEEP_OBSOLETE:
 208         raise ObsoleteDumpVersionException(version)
 209       else:
 210         raise InvalidDumpException('Invalid version: %s' % version)
 211     elif self._lines[ln] == 'STACKTRACES:\n':
 212       raise ObsoleteDumpVersionException(DUMP_DEEP_1)
 213     elif self._lines[ln] == 'MMAP_STACKTRACES:\n':
 214       raise ObsoleteDumpVersionException(DUMP_DEEP_2)
 215
 216     return (version, ln)
 217
 218   def _parse_global_stats(self):
 219     """Parses lines in self._lines as global stats."""
 220     (ln, _) = skip_while(
 221         0, len(self._lines),
 222         lambda n: self._lines[n] != 'GLOBAL_STATS:\n')
 223
 224     global_stat_names = [
 225         'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack',
 226         'other', 'nonprofiled-absent', 'nonprofiled-anonymous',
 227         'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
 228         'nonprofiled-stack', 'nonprofiled-other',
 229         'profiled-mmap', 'profiled-malloc']
 230
 231     for prefix in global_stat_names:
 232       (ln, _) = skip_while(
 233           ln, len(self._lines),
 234           lambda n: self._lines[n].split()[0] != prefix)
 235       words = self._lines[ln].split()
 236       self._global_stats[prefix + '_virtual'] = int(words[-2])
 237       self._global_stats[prefix + '_committed'] = int(words[-1])
 238
 239   def _parse_meta_information(self):
 240     """Parses lines in self._lines for meta information."""
 241     (ln, found) = skip_while(
 242         0, len(self._lines),
 243         lambda n: self._lines[n] != 'META:\n')
 244     if not found:
 245       return
 246     ln += 1
 247
 248     while True:
 249       if self._lines[ln].startswith('Time:'):
 250         matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln])
 251         matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln])
 252         if matched_format:
 253           self._time = time.mktime(datetime.datetime.strptime(
 254               matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple())
 255           if matched_format.group(2):
 256             self._time += float(matched_format.group(2)[1:]) / 1000.0
 257         elif matched_seconds:
 258           self._time = float(matched_seconds.group(1))
 259       elif self._lines[ln].startswith('Reason:'):
 260         pass  # Nothing to do for 'Reason:'
 261       elif self._lines[ln].startswith('PageSize: '):
 262         self._pagesize = int(self._lines[ln][10:])
 263       elif self._lines[ln].startswith('CommandLine:'):
 264         pass
 265       elif (self._lines[ln].startswith('PageFrame: ') or
 266             self._lines[ln].startswith('PFN: ')):
 267         if self._lines[ln].startswith('PageFrame: '):
 268           words = self._lines[ln][11:].split(',')
 269         else:
 270           words = self._lines[ln][5:].split(',')
 271         for word in words:
 272           if word == '24':
 273             self._pageframe_length = 24
 274           elif word == 'Base64':
 275             self._pageframe_encoding = 'base64'
 276           elif word == 'PageCount':
 277             self._has_pagecount = True
 278       elif self._lines[ln].startswith('RunID: '):
 279         self._run_id = self._lines[ln][7:].strip()
 280       elif (self._lines[ln].startswith('MMAP_LIST:') or
 281             self._lines[ln].startswith('GLOBAL_STATS:')):
 282         # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found.
 283         break
 284       else:
 285         pass
 286       ln += 1
 287
 288   def _parse_mmap_list(self):
 289     """Parses lines in self._lines as a mmap list."""
 290     (ln, found) = skip_while(
 291         0, len(self._lines),
 292         lambda n: self._lines[n] != 'MMAP_LIST:\n')
 293     if not found:
 294       return {}
 295
 296     ln += 1
 297     self._map = {}
 298     current_vma = {}
 299     pageframe_list = []
 300     while True:
 301       entry = procfs.ProcMaps.parse_line(self._lines[ln])
 302       if entry:
 303         current_vma = {}
 304         for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end):
 305           for key, value in entry.as_dict().iteritems():
 306             attr[key] = value
 307             current_vma[key] = value
 308         ln += 1
 309         continue
 310
 311       if self._lines[ln].startswith('  PF: '):
 312         for pageframe in self._lines[ln][5:].split():
 313           pageframe_list.append(PageFrame.parse(pageframe, self._pagesize))
 314         ln += 1
 315         continue
 316
 317       matched = self._HOOK_PATTERN.match(self._lines[ln])
 318       if not matched:
 319         break
 320       # 2: starting address
 321       # 5: end address
 322       # 7: hooked or unhooked
 323       # 8: additional information
 324       if matched.group(7) == 'hooked':
 325         submatched = self._HOOKED_PATTERN.match(matched.group(8))
 326         if not submatched:
 327           submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8))
 328       elif matched.group(7) == 'unhooked':
 329         submatched = self._UNHOOKED_PATTERN.match(matched.group(8))
 330         if not submatched:
 331           submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8))
 332       else:
 333         assert matched.group(7) in ['hooked', 'unhooked']
 334
 335       submatched_dict = submatched.groupdict()
 336       region_info = { 'vma': current_vma }
 337       if submatched_dict.get('TYPE'):
 338         region_info['type'] = submatched_dict['TYPE'].strip()
 339       if submatched_dict.get('COMMITTED'):
 340         region_info['committed'] = int(submatched_dict['COMMITTED'])
 341       if submatched_dict.get('RESERVED'):
 342         region_info['reserved'] = int(submatched_dict['RESERVED'])
 343       if submatched_dict.get('BUCKETID'):
 344         region_info['bucket_id'] = int(submatched_dict['BUCKETID'])
 345
 346       if matched.group(1) == '(':
 347         start = current_vma['begin']
 348       else:
 349         start = int(matched.group(2), 16)
 350       if matched.group(4) == '(':
 351         end = current_vma['end']
 352       else:
 353         end = int(matched.group(5), 16)
 354
 355       if pageframe_list and pageframe_list[0].start_truncated:
 356         pageframe_list[0].set_size(
 357             pageframe_list[0].size - start % self._pagesize)
 358       if pageframe_list and pageframe_list[-1].end_truncated:
 359         pageframe_list[-1].set_size(
 360             pageframe_list[-1].size - (self._pagesize - end % self._pagesize))
 361       region_info['pageframe'] = pageframe_list
 362       pageframe_list = []
 363
 364       self._map[(start, end)] = (matched.group(7), region_info)
 365       ln += 1
 366
 367   def _extract_stacktrace_lines(self, line_number):
 368     """Extracts the position of stacktrace lines.
 369
 370     Valid stacktrace lines are stored into self._stacktrace_lines.
 371
 372     Args:
 373         line_number: A line number to start parsing in lines.
 374
 375     Raises:
 376         ParsingException for invalid dump versions.
 377     """
 378     if self._version in (DUMP_DEEP_5, DUMP_DEEP_6):
 379       (line_number, _) = skip_while(
 380           line_number, len(self._lines),
 381           lambda n: not self._lines[n].split()[0].isdigit())
 382       stacktrace_start = line_number
 383       (line_number, _) = skip_while(
 384           line_number, len(self._lines),
 385           lambda n: self._check_stacktrace_line(self._lines[n]))
 386       self._stacktrace_lines = self._lines[stacktrace_start:line_number]
 387
 388     elif self._version in DUMP_DEEP_OBSOLETE:
 389       raise ObsoleteDumpVersionException(self._version)
 390
 391     else:
 392       raise InvalidDumpException('Invalid version: %s' % self._version)
 393
 394   @staticmethod
 395   def _check_stacktrace_line(stacktrace_line):
 396     """Checks if a given stacktrace_line is valid as stacktrace.
 397
 398     Args:
 399         stacktrace_line: A string to be checked.
 400
 401     Returns:
 402         True if the given stacktrace_line is valid.
 403     """
 404     words = stacktrace_line.split()
 405     if len(words) < BUCKET_ID + 1:
 406       return False
 407     if words[BUCKET_ID - 1] != '@':
 408       return False
 409     return True
 410
 411
 412 class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute):
 413   """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict."""
 414   _DUMMY_ENTRY = procfs.ProcMapsEntry(
 415       0,     # begin
 416       0,     # end
 417       '-',   # readable
 418       '-',   # writable
 419       '-',   # executable
 420       '-',   # private
 421       0,     # offset
 422       '00',  # major
 423       '00',  # minor
 424       0,     # inode
 425       ''     # name
 426       )
 427
 428   def __init__(self):
 429     super(ProcMapsEntryAttribute, self).__init__()
 430     self._entry = self._DUMMY_ENTRY.as_dict()
 431
 432   def __str__(self):
 433     return str(self._entry)
 434
 435   def __repr__(self):
 436     return 'ProcMapsEntryAttribute' + str(self._entry)
 437
 438   def __getitem__(self, key):
 439     return self._entry[key]
 440
 441   def __setitem__(self, key, value):
 442     if key not in self._entry:
 443       raise KeyError(key)
 444     self._entry[key] = value
 445
 446   def copy(self):
 447     new_entry = ProcMapsEntryAttribute()
 448     for key, value in self._entry.iteritems():
 449       new_entry[key] = copy.deepcopy(value)
 450     return new_entry
 451
 452
 453 def skip_while(index, max_index, skipping_condition):
 454   """Increments |index| until |skipping_condition|(|index|) is False.
 455
 456   Returns:
 457       A pair of an integer indicating a line number after skipped, and a
 458       boolean value which is True if found a line which skipping_condition
 459       is False for.
 460   """
 461   while skipping_condition(index):
 462     index += 1
 463     if index >= max_index:
 464       return index, False
 465   return index, True