tools/deep_memory_profiler/subcommands/policies.py

   1 # Copyright 2013 The Chromium Authors. All rights reserved.
   2 # Use of this source code is governed by a BSD-style license that can be
   3 # found in the LICENSE file.
   4
   5 import datetime
   6 import json
   7 import logging
   8 import sys
   9
  10 from lib.pageframe import PFNCounts
  11 from lib.policy import PolicySet
  12 from lib.subcommand import SubCommand
  13
  14
  15 LOGGER = logging.getLogger('dmprof')
  16
  17
  18 class PolicyCommands(SubCommand):
  19   def __init__(self, command):
  20     super(PolicyCommands, self).__init__(
  21         'Usage: %%prog %s [-p POLICY] <first-dump> [shared-first-dumps...]' %
  22         command)
  23     self._parser.add_option('-p', '--policy', type='string', dest='policy',
  24                             help='profile with POLICY', metavar='POLICY')
  25     self._parser.add_option('--alternative-dirs', dest='alternative_dirs',
  26                             metavar='/path/on/target@/path/on/host[:...]',
  27                             help='Read files in /path/on/host/ instead of '
  28                                  'files in /path/on/target/.')
  29     self._parser.add_option('--timestamp', dest='timestamp',
  30                             action='store_true', help='Use timestamp.')
  31     self._timestamp = False
  32
  33   def _set_up(self, sys_argv):
  34     options, args = self._parse_args(sys_argv, 1)
  35     dump_path = args[1]
  36     shared_first_dump_paths = args[2:]
  37     alternative_dirs_dict = {}
  38     if options.alternative_dirs:
  39       for alternative_dir_pair in options.alternative_dirs.split(':'):
  40         target_path, host_path = alternative_dir_pair.split('@', 1)
  41         alternative_dirs_dict[target_path] = host_path
  42     (bucket_set, dumps) = SubCommand.load_basic_files(
  43         dump_path, True, alternative_dirs=alternative_dirs_dict)
  44
  45     self._timestamp = options.timestamp
  46
  47     pfn_counts_dict = {}
  48     for shared_first_dump_path in shared_first_dump_paths:
  49       shared_dumps = SubCommand._find_all_dumps(shared_first_dump_path)
  50       for shared_dump in shared_dumps:
  51         pfn_counts = PFNCounts.load(shared_dump)
  52         if pfn_counts.pid not in pfn_counts_dict:
  53           pfn_counts_dict[pfn_counts.pid] = []
  54         pfn_counts_dict[pfn_counts.pid].append(pfn_counts)
  55
  56     policy_set = PolicySet.load(SubCommand._parse_policy_list(options.policy))
  57     return policy_set, dumps, pfn_counts_dict, bucket_set
  58
  59   def _apply_policy(self, dump, pfn_counts_dict, policy, bucket_set,
  60                     first_dump_time):
  61     """Aggregates the total memory size of each component.
  62
  63     Iterate through all stacktraces and attribute them to one of the components
  64     based on the policy.  It is important to apply policy in right order.
  65
  66     Args:
  67         dump: A Dump object.
  68         pfn_counts_dict: A dict mapping a pid to a list of PFNCounts.
  69         policy: A Policy object.
  70         bucket_set: A BucketSet object.
  71         first_dump_time: An integer representing time when the first dump is
  72             dumped.
  73
  74     Returns:
  75         A dict mapping components and their corresponding sizes.
  76     """
  77     LOGGER.info('  %s' % dump.path)
  78     all_pfn_dict = {}
  79     if pfn_counts_dict:
  80       LOGGER.info('    shared with...')
  81       for pid, pfnset_list in pfn_counts_dict.iteritems():
  82         closest_pfnset_index = None
  83         closest_pfnset_difference = 1024.0
  84         for index, pfnset in enumerate(pfnset_list):
  85           time_difference = pfnset.time - dump.time
  86           if time_difference >= 3.0:
  87             break
  88           elif ((time_difference < 0.0 and pfnset.reason != 'Exiting') or
  89                 (0.0 <= time_difference and time_difference < 3.0)):
  90             closest_pfnset_index = index
  91             closest_pfnset_difference = time_difference
  92           elif time_difference < 0.0 and pfnset.reason == 'Exiting':
  93             closest_pfnset_index = None
  94             break
  95         if closest_pfnset_index:
  96           for pfn, count in pfnset_list[closest_pfnset_index].iter_pfn:
  97             all_pfn_dict[pfn] = all_pfn_dict.get(pfn, 0) + count
  98           LOGGER.info('      %s (time difference = %f)' %
  99                       (pfnset_list[closest_pfnset_index].path,
 100                        closest_pfnset_difference))
 101         else:
 102           LOGGER.info('      (no match with pid:%d)' % pid)
 103
 104     sizes = dict((c, 0) for c in policy.components)
 105
 106     PolicyCommands._accumulate_malloc(dump, policy, bucket_set, sizes)
 107     verify_global_stats = PolicyCommands._accumulate_maps(
 108         dump, all_pfn_dict, policy, bucket_set, sizes)
 109
 110     # TODO(dmikurube): Remove the verifying code when GLOBAL_STATS is removed.
 111     # http://crbug.com/245603.
 112     for verify_key, verify_value in verify_global_stats.iteritems():
 113       dump_value = dump.global_stat('%s_committed' % verify_key)
 114       if dump_value != verify_value:
 115         LOGGER.warn('%25s: %12d != %d (%d)' % (
 116             verify_key, dump_value, verify_value, dump_value - verify_value))
 117
 118     sizes['mmap-no-log'] = (
 119         dump.global_stat('profiled-mmap_committed') -
 120         sizes['mmap-total-log'])
 121     sizes['mmap-total-record'] = dump.global_stat('profiled-mmap_committed')
 122     sizes['mmap-total-record-vm'] = dump.global_stat('profiled-mmap_virtual')
 123
 124     sizes['tc-no-log'] = (
 125         dump.global_stat('profiled-malloc_committed') -
 126         sizes['tc-total-log'])
 127     sizes['tc-total-record'] = dump.global_stat('profiled-malloc_committed')
 128     sizes['tc-unused'] = (
 129         sizes['mmap-tcmalloc'] -
 130         dump.global_stat('profiled-malloc_committed'))
 131     if sizes['tc-unused'] < 0:
 132       LOGGER.warn('    Assuming tc-unused=0 as it is negative: %d (bytes)' %
 133                   sizes['tc-unused'])
 134       sizes['tc-unused'] = 0
 135     sizes['tc-total'] = sizes['mmap-tcmalloc']
 136
 137     # TODO(dmikurube): global_stat will be deprecated.
 138     # See http://crbug.com/245603.
 139     for key, value in {
 140         'total': 'total_committed',
 141         'filemapped': 'file_committed',
 142         'absent': 'absent_committed',
 143         'file-exec': 'file-exec_committed',
 144         'file-nonexec': 'file-nonexec_committed',
 145         'anonymous': 'anonymous_committed',
 146         'stack': 'stack_committed',
 147         'other': 'other_committed',
 148         'unhooked-absent': 'nonprofiled-absent_committed',
 149         'total-vm': 'total_virtual',
 150         'filemapped-vm': 'file_virtual',
 151         'anonymous-vm': 'anonymous_virtual',
 152         'other-vm': 'other_virtual' }.iteritems():
 153       if key in sizes:
 154         sizes[key] = dump.global_stat(value)
 155
 156     if 'mustbezero' in sizes:
 157       removed_list = (
 158           'profiled-mmap_committed',
 159           'nonprofiled-absent_committed',
 160           'nonprofiled-anonymous_committed',
 161           'nonprofiled-file-exec_committed',
 162           'nonprofiled-file-nonexec_committed',
 163           'nonprofiled-stack_committed',
 164           'nonprofiled-other_committed')
 165       sizes['mustbezero'] = (
 166           dump.global_stat('total_committed') -
 167           sum(dump.global_stat(removed) for removed in removed_list))
 168     if 'total-exclude-profiler' in sizes:
 169       sizes['total-exclude-profiler'] = (
 170           dump.global_stat('total_committed') -
 171           (sizes['mmap-profiler'] + sizes['mmap-type-profiler']))
 172     if 'hour' in sizes:
 173       sizes['hour'] = (dump.time - first_dump_time) / 60.0 / 60.0
 174     if 'minute' in sizes:
 175       sizes['minute'] = (dump.time - first_dump_time) / 60.0
 176     if 'second' in sizes:
 177       if self._timestamp:
 178         sizes['second'] = datetime.datetime.fromtimestamp(dump.time).isoformat()
 179       else:
 180         sizes['second'] = dump.time - first_dump_time
 181
 182     return sizes
 183
 184   @staticmethod
 185   def _accumulate_malloc(dump, policy, bucket_set, sizes):
 186     for bucket_id, _, committed, _, _ in dump.iter_stacktrace:
 187       bucket = bucket_set.get(bucket_id)
 188       if not bucket or bucket.allocator_type == 'malloc':
 189         component_match = policy.find_malloc(bucket)
 190       elif bucket.allocator_type == 'mmap':
 191         continue
 192       else:
 193         assert False
 194       sizes[component_match] += committed
 195
 196       assert not component_match.startswith('mmap-')
 197       if component_match.startswith('tc-'):
 198         sizes['tc-total-log'] += committed
 199       else:
 200         sizes['other-total-log'] += committed
 201
 202   @staticmethod
 203   def _accumulate_maps(dump, pfn_dict, policy, bucket_set, sizes):
 204     # TODO(dmikurube): Remove the dict when GLOBAL_STATS is removed.
 205     # http://crbug.com/245603.
 206     global_stats = {
 207         'total': 0,
 208         'file-exec': 0,
 209         'file-nonexec': 0,
 210         'anonymous': 0,
 211         'stack': 0,
 212         'other': 0,
 213         'nonprofiled-file-exec': 0,
 214         'nonprofiled-file-nonexec': 0,
 215         'nonprofiled-anonymous': 0,
 216         'nonprofiled-stack': 0,
 217         'nonprofiled-other': 0,
 218         'profiled-mmap': 0,
 219         }
 220
 221     for key, value in dump.iter_map:
 222       # TODO(dmikurube): Remove the subtotal code when GLOBAL_STATS is removed.
 223       # It's temporary verification code for transition described in
 224       # http://crbug.com/245603.
 225       committed = 0
 226       if 'committed' in value[1]:
 227         committed = value[1]['committed']
 228       global_stats['total'] += committed
 229       key = 'other'
 230       name = value[1]['vma']['name']
 231       if name.startswith('/'):
 232         if value[1]['vma']['executable'] == 'x':
 233           key = 'file-exec'
 234         else:
 235           key = 'file-nonexec'
 236       elif name == '[stack]':
 237         key = 'stack'
 238       elif name == '':
 239         key = 'anonymous'
 240       global_stats[key] += committed
 241       if value[0] == 'unhooked':
 242         global_stats['nonprofiled-' + key] += committed
 243       if value[0] == 'hooked':
 244         global_stats['profiled-mmap'] += committed
 245
 246       if value[0] == 'unhooked':
 247         if pfn_dict and dump.pageframe_length:
 248           for pageframe in value[1]['pageframe']:
 249             component_match = policy.find_unhooked(value, pageframe, pfn_dict)
 250             sizes[component_match] += pageframe.size
 251         else:
 252           component_match = policy.find_unhooked(value)
 253           sizes[component_match] += int(value[1]['committed'])
 254       elif value[0] == 'hooked':
 255         if pfn_dict and dump.pageframe_length:
 256           for pageframe in value[1]['pageframe']:
 257             component_match, _ = policy.find_mmap(
 258                 value, bucket_set, pageframe, pfn_dict)
 259             sizes[component_match] += pageframe.size
 260             assert not component_match.startswith('tc-')
 261             if component_match.startswith('mmap-'):
 262               sizes['mmap-total-log'] += pageframe.size
 263             else:
 264               sizes['other-total-log'] += pageframe.size
 265         else:
 266           component_match, _ = policy.find_mmap(value, bucket_set)
 267           sizes[component_match] += int(value[1]['committed'])
 268           if component_match.startswith('mmap-'):
 269             sizes['mmap-total-log'] += int(value[1]['committed'])
 270           else:
 271             sizes['other-total-log'] += int(value[1]['committed'])
 272       else:
 273         LOGGER.error('Unrecognized mapping status: %s' % value[0])
 274
 275     return global_stats
 276
 277
 278 class CSVCommand(PolicyCommands):
 279   def __init__(self):
 280     super(CSVCommand, self).__init__('csv')
 281
 282   def do(self, sys_argv):
 283     policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv)
 284     return self._output(
 285         policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout)
 286
 287   def _output(self, policy_set, dumps, pfn_counts_dict, bucket_set, out):
 288     max_components = 0
 289     for label in policy_set:
 290       max_components = max(max_components, len(policy_set[label].components))
 291
 292     for label in sorted(policy_set):
 293       components = policy_set[label].components
 294       if len(policy_set) > 1:
 295         out.write('%s%s\n' % (label, ',' * (max_components - 1)))
 296       out.write('%s%s\n' % (
 297           ','.join(components), ',' * (max_components - len(components))))
 298
 299       LOGGER.info('Applying a policy %s to...' % label)
 300       for index, dump in enumerate(dumps):
 301         if index == 0:
 302           first_dump_time = dump.time
 303         component_sizes = self._apply_policy(
 304             dump, pfn_counts_dict, policy_set[label], bucket_set,
 305             first_dump_time)
 306         s = []
 307         for c in components:
 308           if c in ('hour', 'minute', 'second'):
 309             if isinstance(component_sizes[c], str):
 310               s.append('%s' % component_sizes[c])
 311             else:
 312               s.append('%05.5f' % (component_sizes[c]))
 313           else:
 314             s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0))
 315         out.write('%s%s\n' % (
 316               ','.join(s), ',' * (max_components - len(components))))
 317
 318       bucket_set.clear_component_cache()
 319
 320     return 0
 321
 322
 323 class JSONCommand(PolicyCommands):
 324   def __init__(self):
 325     super(JSONCommand, self).__init__('json')
 326
 327   def do(self, sys_argv):
 328     policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv)
 329     return self._output(
 330         policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout)
 331
 332   def _output(self, policy_set, dumps, pfn_counts_dict, bucket_set, out):
 333     json_base = {
 334       'version': 'JSON_DEEP_2',
 335       'policies': {},
 336     }
 337
 338     for label in sorted(policy_set):
 339       json_base['policies'][label] = {
 340         'legends': policy_set[label].components,
 341         'snapshots': [],
 342       }
 343
 344       LOGGER.info('Applying a policy %s to...' % label)
 345       for index, dump in enumerate(dumps):
 346         if index == 0:
 347           first_dump_time = dump.time
 348         component_sizes = self._apply_policy(
 349             dump, pfn_counts_dict, policy_set[label], bucket_set,
 350             first_dump_time)
 351         component_sizes['dump_path'] = dump.path
 352         component_sizes['dump_time'] = datetime.datetime.fromtimestamp(
 353             dump.time).strftime('%Y-%m-%d %H:%M:%S')
 354         json_base['policies'][label]['snapshots'].append(component_sizes)
 355
 356       bucket_set.clear_component_cache()
 357
 358     json.dump(json_base, out, indent=2, sort_keys=True)
 359
 360     return 0
 361
 362
 363 class ListCommand(PolicyCommands):
 364   def __init__(self):
 365     super(ListCommand, self).__init__('list')
 366
 367   def do(self, sys_argv):
 368     policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv)
 369     return self._output(
 370         policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout)
 371
 372   def _output(self, policy_set, dumps, pfn_counts_dict, bucket_set, out):
 373     for label in sorted(policy_set):
 374       LOGGER.info('Applying a policy %s to...' % label)
 375       for dump in dumps:
 376         component_sizes = self._apply_policy(
 377             dump, pfn_counts_dict, policy_set[label], bucket_set, dump.time)
 378         out.write('%s for %s:\n' % (label, dump.path))
 379         for c in policy_set[label].components:
 380           if c in ['hour', 'minute', 'second']:
 381             out.write('%40s %12.3f\n' % (c, component_sizes[c]))
 382           else:
 383             out.write('%40s %12d\n' % (c, component_sizes[c]))
 384
 385       bucket_set.clear_component_cache()
 386
 387     return 0