Re-enable index-basics-workers test to see if still times
[chromium-blink-merge.git] / tools / deep_memory_profiler / dmprof.py
blobf40414beffb60717a73f4b16218a6bb4b855e52d
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """The deep heap profiler script for Chrome."""
7 import copy
8 import datetime
9 import json
10 import logging
11 import optparse
12 import os
13 import re
14 import subprocess
15 import sys
16 import tempfile
17 import time
18 import zipfile
20 from range_dict import ExclusiveRangeDict
22 BASE_PATH = os.path.dirname(os.path.abspath(__file__))
23 FIND_RUNTIME_SYMBOLS_PATH = os.path.join(
24 BASE_PATH, os.pardir, 'find_runtime_symbols')
25 sys.path.append(FIND_RUNTIME_SYMBOLS_PATH)
27 import find_runtime_symbols
28 import prepare_symbol_info
29 import proc_maps
31 from find_runtime_symbols import FUNCTION_SYMBOLS
32 from find_runtime_symbols import SOURCEFILE_SYMBOLS
33 from find_runtime_symbols import TYPEINFO_SYMBOLS
35 BUCKET_ID = 5
36 VIRTUAL = 0
37 COMMITTED = 1
38 ALLOC_COUNT = 2
39 FREE_COUNT = 3
40 NULL_REGEX = re.compile('')
42 LOGGER = logging.getLogger('dmprof')
43 POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json')
46 # Heap Profile Dump versions
48 # DUMP_DEEP_[1-4] are obsolete.
49 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks.
50 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps.
51 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".
52 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.
53 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.
54 DUMP_DEEP_1 = 'DUMP_DEEP_1'
55 DUMP_DEEP_2 = 'DUMP_DEEP_2'
56 DUMP_DEEP_3 = 'DUMP_DEEP_3'
57 DUMP_DEEP_4 = 'DUMP_DEEP_4'
59 DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)
61 # DUMP_DEEP_5 doesn't separate sections for malloc and mmap.
62 # malloc and mmap are identified in bucket files.
63 # DUMP_DEEP_5 should be processed by POLICY_DEEP_4.
64 DUMP_DEEP_5 = 'DUMP_DEEP_5'
66 # DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5.
67 DUMP_DEEP_6 = 'DUMP_DEEP_6'
69 # Heap Profile Policy versions
71 # POLICY_DEEP_1 DOES NOT include allocation_type columns.
72 # mmap regions are distincted w/ mmap frames in the pattern column.
73 POLICY_DEEP_1 = 'POLICY_DEEP_1'
75 # POLICY_DEEP_2 DOES include allocation_type columns.
76 # mmap regions are distincted w/ the allocation_type column.
77 POLICY_DEEP_2 = 'POLICY_DEEP_2'
79 # POLICY_DEEP_3 is in JSON format.
80 POLICY_DEEP_3 = 'POLICY_DEEP_3'
82 # POLICY_DEEP_3 contains typeinfo.
83 POLICY_DEEP_4 = 'POLICY_DEEP_4'
86 class EmptyDumpException(Exception):
87 def __init__(self, value=''):
88 super(EmptyDumpException, self).__init__()
89 self.value = value
90 def __str__(self):
91 return repr(self.value)
94 class ParsingException(Exception):
95 def __init__(self, value=''):
96 super(ParsingException, self).__init__()
97 self.value = value
98 def __str__(self):
99 return repr(self.value)
102 class InvalidDumpException(ParsingException):
103 def __init__(self, value):
104 super(InvalidDumpException, self).__init__()
105 self.value = value
106 def __str__(self):
107 return "invalid heap profile dump: %s" % repr(self.value)
110 class ObsoleteDumpVersionException(ParsingException):
111 def __init__(self, value):
112 super(ObsoleteDumpVersionException, self).__init__()
113 self.value = value
114 def __str__(self):
115 return "obsolete heap profile dump version: %s" % repr(self.value)
118 class ListAttribute(ExclusiveRangeDict.RangeAttribute):
119 """Represents a list for an attribute in range_dict.ExclusiveRangeDict."""
120 def __init__(self):
121 super(ListAttribute, self).__init__()
122 self._list = []
124 def __str__(self):
125 return str(self._list)
127 def __repr__(self):
128 return 'ListAttribute' + str(self._list)
130 def __len__(self):
131 return len(self._list)
133 def __iter__(self):
134 for x in self._list:
135 yield x
137 def __getitem__(self, index):
138 return self._list[index]
140 def __setitem__(self, index, value):
141 if index >= len(self._list):
142 self._list.extend([None] * (index + 1 - len(self._list)))
143 self._list[index] = value
145 def copy(self):
146 new_list = ListAttribute()
147 for index, item in enumerate(self._list):
148 new_list[index] = copy.deepcopy(item)
149 return new_list
152 class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute):
153 """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict."""
154 _DUMMY_ENTRY = proc_maps.ProcMapsEntry(
155 0, # begin
156 0, # end
157 '-', # readable
158 '-', # writable
159 '-', # executable
160 '-', # private
161 0, # offset
162 '00', # major
163 '00', # minor
164 0, # inode
165 '' # name
168 def __init__(self):
169 super(ProcMapsEntryAttribute, self).__init__()
170 self._entry = self._DUMMY_ENTRY.as_dict()
172 def __str__(self):
173 return str(self._entry)
175 def __repr__(self):
176 return 'ProcMapsEntryAttribute' + str(self._entry)
178 def __getitem__(self, key):
179 return self._entry[key]
181 def __setitem__(self, key, value):
182 if key not in self._entry:
183 raise KeyError(key)
184 self._entry[key] = value
186 def copy(self):
187 new_entry = ProcMapsEntryAttribute()
188 for key, value in self._entry.iteritems():
189 new_entry[key] = copy.deepcopy(value)
190 return new_entry
193 def skip_while(index, max_index, skipping_condition):
194 """Increments |index| until |skipping_condition|(|index|) is False.
196 Returns:
197 A pair of an integer indicating a line number after skipped, and a
198 boolean value which is True if found a line which skipping_condition
199 is False for.
201 while skipping_condition(index):
202 index += 1
203 if index >= max_index:
204 return index, False
205 return index, True
208 class SymbolDataSources(object):
209 """Manages symbol data sources in a process.
211 The symbol data sources consist of maps (/proc/<pid>/maps), nm, readelf and
212 so on. They are collected into a directory '|prefix|.symmap' from the binary
213 files by 'prepare()' with tools/find_runtime_symbols/prepare_symbol_info.py.
215 Binaries are not mandatory to profile. The prepared data sources work in
216 place of the binary even if the binary has been overwritten with another
217 binary.
219 Note that loading the symbol data sources takes a long time. They are often
220 very big. So, the 'dmprof' profiler is designed to use 'SymbolMappingCache'
221 which caches actually used symbols.
223 def __init__(self, prefix, fake_directories=None):
224 self._prefix = prefix
225 self._prepared_symbol_data_sources_path = None
226 self._loaded_symbol_data_sources = None
227 self._fake_directories = fake_directories or {}
229 def prepare(self):
230 """Prepares symbol data sources by extracting mapping from a binary.
232 The prepared symbol data sources are stored in a directory. The directory
233 name is stored in |self._prepared_symbol_data_sources_path|.
235 Returns:
236 True if succeeded.
238 LOGGER.info('Preparing symbol mapping...')
239 self._prepared_symbol_data_sources_path, used_tempdir = (
240 prepare_symbol_info.prepare_symbol_info(
241 self._prefix + '.maps',
242 output_dir_path=self._prefix + '.symmap',
243 fake_directories=self._fake_directories,
244 use_tempdir=True,
245 use_source_file_name=True))
246 if self._prepared_symbol_data_sources_path:
247 LOGGER.info(' Prepared symbol mapping.')
248 if used_tempdir:
249 LOGGER.warn(' Using a temporary directory for symbol mapping.')
250 LOGGER.warn(' Delete it by yourself.')
251 LOGGER.warn(' Or, move the directory by yourself to use it later.')
252 return True
253 else:
254 LOGGER.warn(' Failed to prepare symbol mapping.')
255 return False
257 def get(self):
258 """Returns the prepared symbol data sources.
260 Returns:
261 The prepared symbol data sources. None if failed.
263 if not self._prepared_symbol_data_sources_path and not self.prepare():
264 return None
265 if not self._loaded_symbol_data_sources:
266 LOGGER.info('Loading symbol mapping...')
267 self._loaded_symbol_data_sources = (
268 find_runtime_symbols.RuntimeSymbolsInProcess.load(
269 self._prepared_symbol_data_sources_path))
270 return self._loaded_symbol_data_sources
272 def path(self):
273 """Returns the path of the prepared symbol data sources if possible."""
274 if not self._prepared_symbol_data_sources_path and not self.prepare():
275 return None
276 return self._prepared_symbol_data_sources_path
279 class SymbolFinder(object):
280 """Finds corresponding symbols from addresses.
282 This class does only 'find()' symbols from a specified |address_list|.
283 It is introduced to make a finder mockable.
285 def __init__(self, symbol_type, symbol_data_sources):
286 self._symbol_type = symbol_type
287 self._symbol_data_sources = symbol_data_sources
289 def find(self, address_list):
290 return find_runtime_symbols.find_runtime_symbols(
291 self._symbol_type, self._symbol_data_sources.get(), address_list)
294 class SymbolMappingCache(object):
295 """Caches mapping from actually used addresses to symbols.
297 'update()' updates the cache from the original symbol data sources via
298 'SymbolFinder'. Symbols can be looked up by the method 'lookup()'.
300 def __init__(self):
301 self._symbol_mapping_caches = {
302 FUNCTION_SYMBOLS: {},
303 SOURCEFILE_SYMBOLS: {},
304 TYPEINFO_SYMBOLS: {},
307 def update(self, symbol_type, bucket_set, symbol_finder, cache_f):
308 """Updates symbol mapping cache on memory and in a symbol cache file.
310 It reads cached symbol mapping from a symbol cache file |cache_f| if it
311 exists. Unresolved addresses are then resolved and added to the cache
312 both on memory and in the symbol cache file with using 'SymbolFinder'.
314 A cache file is formatted as follows:
315 <Address> <Symbol>
316 <Address> <Symbol>
317 <Address> <Symbol>
320 Args:
321 symbol_type: A type of symbols to update. It should be one of
322 FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS.
323 bucket_set: A BucketSet object.
324 symbol_finder: A SymbolFinder object to find symbols.
325 cache_f: A readable and writable IO object of the symbol cache file.
327 cache_f.seek(0, os.SEEK_SET)
328 self._load(cache_f, symbol_type)
330 unresolved_addresses = sorted(
331 address for address in bucket_set.iter_addresses(symbol_type)
332 if address not in self._symbol_mapping_caches[symbol_type])
334 if not unresolved_addresses:
335 LOGGER.info('No need to resolve any more addresses.')
336 return
338 cache_f.seek(0, os.SEEK_END)
339 LOGGER.info('Loading %d unresolved addresses.' %
340 len(unresolved_addresses))
341 symbol_dict = symbol_finder.find(unresolved_addresses)
343 for address, symbol in symbol_dict.iteritems():
344 stripped_symbol = symbol.strip() or '?'
345 self._symbol_mapping_caches[symbol_type][address] = stripped_symbol
346 cache_f.write('%x %s\n' % (address, stripped_symbol))
348 def lookup(self, symbol_type, address):
349 """Looks up a symbol for a given |address|.
351 Args:
352 symbol_type: A type of symbols to update. It should be one of
353 FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS.
354 address: An integer that represents an address.
356 Returns:
357 A string that represents a symbol.
359 return self._symbol_mapping_caches[symbol_type].get(address)
361 def _load(self, cache_f, symbol_type):
362 try:
363 for line in cache_f:
364 items = line.rstrip().split(None, 1)
365 if len(items) == 1:
366 items.append('??')
367 self._symbol_mapping_caches[symbol_type][int(items[0], 16)] = items[1]
368 LOGGER.info('Loaded %d entries from symbol cache.' %
369 len(self._symbol_mapping_caches[symbol_type]))
370 except IOError as e:
371 LOGGER.info('The symbol cache file is invalid: %s' % e)
374 class Rule(object):
375 """Represents one matching rule in a policy file."""
377 def __init__(self,
378 name,
379 mmap,
380 stackfunction_pattern=None,
381 stacksourcefile_pattern=None,
382 typeinfo_pattern=None):
383 self._name = name
384 self._mmap = mmap
386 self._stackfunction_pattern = None
387 if stackfunction_pattern:
388 self._stackfunction_pattern = re.compile(
389 stackfunction_pattern + r'\Z')
391 self._stacksourcefile_pattern = None
392 if stacksourcefile_pattern:
393 self._stacksourcefile_pattern = re.compile(
394 stacksourcefile_pattern + r'\Z')
396 self._typeinfo_pattern = None
397 if typeinfo_pattern:
398 self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z')
400 @property
401 def name(self):
402 return self._name
404 @property
405 def mmap(self):
406 return self._mmap
408 @property
409 def stackfunction_pattern(self):
410 return self._stackfunction_pattern
412 @property
413 def stacksourcefile_pattern(self):
414 return self._stacksourcefile_pattern
416 @property
417 def typeinfo_pattern(self):
418 return self._typeinfo_pattern
421 class Policy(object):
422 """Represents a policy, a content of a policy file."""
424 def __init__(self, rules, version, components):
425 self._rules = rules
426 self._version = version
427 self._components = components
429 @property
430 def rules(self):
431 return self._rules
433 @property
434 def version(self):
435 return self._version
437 @property
438 def components(self):
439 return self._components
441 def find(self, bucket):
442 """Finds a matching component name which a given |bucket| belongs to.
444 Args:
445 bucket: A Bucket object to be searched for.
447 Returns:
448 A string representing a component name.
450 if not bucket:
451 return 'no-bucket'
452 if bucket.component_cache:
453 return bucket.component_cache
455 stackfunction = bucket.symbolized_joined_stackfunction
456 stacksourcefile = bucket.symbolized_joined_stacksourcefile
457 typeinfo = bucket.symbolized_typeinfo
458 if typeinfo.startswith('0x'):
459 typeinfo = bucket.typeinfo_name
461 for rule in self._rules:
462 if (bucket.mmap == rule.mmap and
463 (not rule.stackfunction_pattern or
464 rule.stackfunction_pattern.match(stackfunction)) and
465 (not rule.stacksourcefile_pattern or
466 rule.stacksourcefile_pattern.match(stacksourcefile)) and
467 (not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))):
468 bucket.component_cache = rule.name
469 return rule.name
471 assert False
473 @staticmethod
474 def load(filename, filetype):
475 """Loads a policy file of |filename| in a |format|.
477 Args:
478 filename: A filename to be loaded.
479 filetype: A string to specify a type of the file. Only 'json' is
480 supported for now.
482 Returns:
483 A loaded Policy object.
485 with open(os.path.join(BASE_PATH, filename)) as policy_f:
486 return Policy.parse(policy_f, filetype)
488 @staticmethod
489 def parse(policy_f, filetype):
490 """Parses a policy file content in a |format|.
492 Args:
493 policy_f: An IO object to be loaded.
494 filetype: A string to specify a type of the file. Only 'json' is
495 supported for now.
497 Returns:
498 A loaded Policy object.
500 if filetype == 'json':
501 return Policy._parse_json(policy_f)
502 else:
503 return None
505 @staticmethod
506 def _parse_json(policy_f):
507 """Parses policy file in json format.
509 A policy file contains component's names and their stacktrace pattern
510 written in regular expression. Those patterns are matched against each
511 symbols of each stacktraces in the order written in the policy file
513 Args:
514 policy_f: A File/IO object to read.
516 Returns:
517 A loaded policy object.
519 policy = json.load(policy_f)
521 rules = []
522 for rule in policy['rules']:
523 stackfunction = rule.get('stackfunction') or rule.get('stacktrace')
524 stacksourcefile = rule.get('stacksourcefile')
525 rules.append(Rule(
526 rule['name'],
527 rule['allocator'] == 'mmap',
528 stackfunction,
529 stacksourcefile,
530 rule['typeinfo'] if 'typeinfo' in rule else None))
532 return Policy(rules, policy['version'], policy['components'])
535 class PolicySet(object):
536 """Represents a set of policies."""
538 def __init__(self, policy_directory):
539 self._policy_directory = policy_directory
541 @staticmethod
542 def load(labels=None):
543 """Loads a set of policies via the "default policy directory".
545 The "default policy directory" contains pairs of policies and their labels.
546 For example, a policy "policy.l0.json" is labeled "l0" in the default
547 policy directory "policies.json".
549 All policies in the directory are loaded by default. Policies can be
550 limited by |labels|.
552 Args:
553 labels: An array that contains policy labels to be loaded.
555 Returns:
556 A PolicySet object.
558 default_policy_directory = PolicySet._load_default_policy_directory()
559 if labels:
560 specified_policy_directory = {}
561 for label in labels:
562 if label in default_policy_directory:
563 specified_policy_directory[label] = default_policy_directory[label]
564 # TODO(dmikurube): Load an un-labeled policy file.
565 return PolicySet._load_policies(specified_policy_directory)
566 else:
567 return PolicySet._load_policies(default_policy_directory)
569 def __len__(self):
570 return len(self._policy_directory)
572 def __iter__(self):
573 for label in self._policy_directory:
574 yield label
576 def __getitem__(self, label):
577 return self._policy_directory[label]
579 @staticmethod
580 def _load_default_policy_directory():
581 with open(POLICIES_JSON_PATH, mode='r') as policies_f:
582 default_policy_directory = json.load(policies_f)
583 return default_policy_directory
585 @staticmethod
586 def _load_policies(directory):
587 LOGGER.info('Loading policy files.')
588 policies = {}
589 for label in directory:
590 LOGGER.info(' %s: %s' % (label, directory[label]['file']))
591 loaded = Policy.load(directory[label]['file'], directory[label]['format'])
592 if loaded:
593 policies[label] = loaded
594 return PolicySet(policies)
597 class Bucket(object):
598 """Represents a bucket, which is a unit of memory block classification."""
600 def __init__(self, stacktrace, mmap, typeinfo, typeinfo_name):
601 self._stacktrace = stacktrace
602 self._mmap = mmap
603 self._typeinfo = typeinfo
604 self._typeinfo_name = typeinfo_name
606 self._symbolized_stackfunction = stacktrace
607 self._symbolized_joined_stackfunction = ''
608 self._symbolized_stacksourcefile = stacktrace
609 self._symbolized_joined_stacksourcefile = ''
610 self._symbolized_typeinfo = typeinfo_name
612 self.component_cache = ''
614 def __str__(self):
615 result = []
616 result.append('mmap' if self._mmap else 'malloc')
617 if self._symbolized_typeinfo == 'no typeinfo':
618 result.append('tno_typeinfo')
619 else:
620 result.append('t' + self._symbolized_typeinfo)
621 result.append('n' + self._typeinfo_name)
622 result.extend(['%s(@%s)' % (function, sourcefile)
623 for function, sourcefile
624 in zip(self._symbolized_stackfunction,
625 self._symbolized_stacksourcefile)])
626 return ' '.join(result)
628 def symbolize(self, symbol_mapping_cache):
629 """Makes a symbolized stacktrace and typeinfo with |symbol_mapping_cache|.
631 Args:
632 symbol_mapping_cache: A SymbolMappingCache object.
634 # TODO(dmikurube): Fill explicitly with numbers if symbol not found.
635 self._symbolized_stackfunction = [
636 symbol_mapping_cache.lookup(FUNCTION_SYMBOLS, address)
637 for address in self._stacktrace]
638 self._symbolized_joined_stackfunction = ' '.join(
639 self._symbolized_stackfunction)
640 self._symbolized_stacksourcefile = [
641 symbol_mapping_cache.lookup(SOURCEFILE_SYMBOLS, address)
642 for address in self._stacktrace]
643 self._symbolized_joined_stacksourcefile = ' '.join(
644 self._symbolized_stacksourcefile)
645 if not self._typeinfo:
646 self._symbolized_typeinfo = 'no typeinfo'
647 else:
648 self._symbolized_typeinfo = symbol_mapping_cache.lookup(
649 TYPEINFO_SYMBOLS, self._typeinfo)
650 if not self._symbolized_typeinfo:
651 self._symbolized_typeinfo = 'no typeinfo'
653 def clear_component_cache(self):
654 self.component_cache = ''
656 @property
657 def stacktrace(self):
658 return self._stacktrace
660 @property
661 def mmap(self):
662 return self._mmap
664 @property
665 def typeinfo(self):
666 return self._typeinfo
668 @property
669 def typeinfo_name(self):
670 return self._typeinfo_name
672 @property
673 def symbolized_stackfunction(self):
674 return self._symbolized_stackfunction
676 @property
677 def symbolized_joined_stackfunction(self):
678 return self._symbolized_joined_stackfunction
680 @property
681 def symbolized_stacksourcefile(self):
682 return self._symbolized_stacksourcefile
684 @property
685 def symbolized_joined_stacksourcefile(self):
686 return self._symbolized_joined_stacksourcefile
688 @property
689 def symbolized_typeinfo(self):
690 return self._symbolized_typeinfo
693 class BucketSet(object):
694 """Represents a set of bucket."""
695 def __init__(self):
696 self._buckets = {}
697 self._code_addresses = set()
698 self._typeinfo_addresses = set()
700 def load(self, prefix):
701 """Loads all related bucket files.
703 Args:
704 prefix: A prefix string for bucket file names.
706 LOGGER.info('Loading bucket files.')
708 n = 0
709 while True:
710 path = '%s.%04d.buckets' % (prefix, n)
711 if not os.path.exists(path):
712 if n > 10:
713 break
714 n += 1
715 continue
716 LOGGER.info(' %s' % path)
717 with open(path, 'r') as f:
718 self._load_file(f)
719 n += 1
721 def _load_file(self, bucket_f):
722 for line in bucket_f:
723 words = line.split()
724 typeinfo = None
725 typeinfo_name = ''
726 stacktrace_begin = 2
727 for index, word in enumerate(words):
728 if index < 2:
729 continue
730 if word[0] == 't':
731 typeinfo = int(word[1:], 16)
732 self._typeinfo_addresses.add(typeinfo)
733 elif word[0] == 'n':
734 typeinfo_name = word[1:]
735 else:
736 stacktrace_begin = index
737 break
738 stacktrace = [int(address, 16) for address in words[stacktrace_begin:]]
739 for frame in stacktrace:
740 self._code_addresses.add(frame)
741 self._buckets[int(words[0])] = Bucket(
742 stacktrace, words[1] == 'mmap', typeinfo, typeinfo_name)
744 def __iter__(self):
745 for bucket_id, bucket_content in self._buckets.iteritems():
746 yield bucket_id, bucket_content
748 def __getitem__(self, bucket_id):
749 return self._buckets[bucket_id]
751 def get(self, bucket_id):
752 return self._buckets.get(bucket_id)
754 def symbolize(self, symbol_mapping_cache):
755 for bucket_content in self._buckets.itervalues():
756 bucket_content.symbolize(symbol_mapping_cache)
758 def clear_component_cache(self):
759 for bucket_content in self._buckets.itervalues():
760 bucket_content.clear_component_cache()
762 def iter_addresses(self, symbol_type):
763 if symbol_type in [FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS]:
764 for function in self._code_addresses:
765 yield function
766 else:
767 for function in self._typeinfo_addresses:
768 yield function
771 class Dump(object):
772 """Represents a heap profile dump."""
774 _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')
776 _HOOK_PATTERN = re.compile(
777 r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+'
778 r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE)
780 _TIME_PATTERN = re.compile(
781 r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?')
783 def __init__(self, path, modified_time):
784 self._path = path
785 matched = self._PATH_PATTERN.match(path)
786 self._pid = int(matched.group(2))
787 self._count = int(matched.group(3))
788 self._time = modified_time
789 self._map = {}
790 self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute)
791 self._stacktrace_lines = []
792 self._global_stats = {} # used only in apply_policy
794 self._version = ''
795 self._lines = []
797 @property
798 def path(self):
799 return self._path
801 @property
802 def count(self):
803 return self._count
805 @property
806 def time(self):
807 return self._time
809 @property
810 def iter_map(self):
811 for region in sorted(self._map.iteritems()):
812 yield region[0], region[1]
814 def iter_procmaps(self):
815 for begin, end, attr in self._map.iter_range():
816 yield begin, end, attr
818 @property
819 def iter_stacktrace(self):
820 for line in self._stacktrace_lines:
821 yield line
823 def global_stat(self, name):
824 return self._global_stats[name]
826 @staticmethod
827 def load(path, log_header='Loading a heap profile dump: '):
828 """Loads a heap profile dump.
830 Args:
831 path: A file path string to load.
832 log_header: A preceding string for log messages.
834 Returns:
835 A loaded Dump object.
837 Raises:
838 ParsingException for invalid heap profile dumps.
840 dump = Dump(path, os.stat(path).st_mtime)
841 with open(path, 'r') as f:
842 dump.load_file(f, log_header)
843 return dump
845 def load_file(self, f, log_header):
846 self._lines = [line for line in f
847 if line and not line.startswith('#')]
849 try:
850 self._version, ln = self._parse_version()
851 self._parse_meta_information()
852 if self._version == DUMP_DEEP_6:
853 self._parse_mmap_list()
854 self._parse_global_stats()
855 self._extract_stacktrace_lines(ln)
856 except EmptyDumpException:
857 LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path))
858 except ParsingException, e:
859 LOGGER.error('%s%s ...error %s' % (log_header, self._path, e))
860 raise
861 else:
862 LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version))
864 def _parse_version(self):
865 """Parses a version string in self._lines.
867 Returns:
868 A pair of (a string representing a version of the stacktrace dump,
869 and an integer indicating a line number next to the version string).
871 Raises:
872 ParsingException for invalid dump versions.
874 version = ''
876 # Skip until an identifiable line.
877 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
878 if not self._lines:
879 raise EmptyDumpException('Empty heap dump file.')
880 (ln, found) = skip_while(
881 0, len(self._lines),
882 lambda n: not self._lines[n].startswith(headers))
883 if not found:
884 raise InvalidDumpException('No version header.')
886 # Identify a version.
887 if self._lines[ln].startswith('heap profile: '):
888 version = self._lines[ln][13:].strip()
889 if version in (DUMP_DEEP_5, DUMP_DEEP_6):
890 (ln, _) = skip_while(
891 ln, len(self._lines),
892 lambda n: self._lines[n] != 'STACKTRACES:\n')
893 elif version in DUMP_DEEP_OBSOLETE:
894 raise ObsoleteDumpVersionException(version)
895 else:
896 raise InvalidDumpException('Invalid version: %s' % version)
897 elif self._lines[ln] == 'STACKTRACES:\n':
898 raise ObsoleteDumpVersionException(DUMP_DEEP_1)
899 elif self._lines[ln] == 'MMAP_STACKTRACES:\n':
900 raise ObsoleteDumpVersionException(DUMP_DEEP_2)
902 return (version, ln)
904 def _parse_global_stats(self):
905 """Parses lines in self._lines as global stats."""
906 (ln, _) = skip_while(
907 0, len(self._lines),
908 lambda n: self._lines[n] != 'GLOBAL_STATS:\n')
910 global_stat_names = [
911 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack',
912 'other', 'nonprofiled-absent', 'nonprofiled-anonymous',
913 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
914 'nonprofiled-stack', 'nonprofiled-other',
915 'profiled-mmap', 'profiled-malloc']
917 for prefix in global_stat_names:
918 (ln, _) = skip_while(
919 ln, len(self._lines),
920 lambda n: self._lines[n].split()[0] != prefix)
921 words = self._lines[ln].split()
922 self._global_stats[prefix + '_virtual'] = int(words[-2])
923 self._global_stats[prefix + '_committed'] = int(words[-1])
925 def _parse_meta_information(self):
926 """Parses lines in self._lines for meta information."""
927 (ln, found) = skip_while(
928 0, len(self._lines),
929 lambda n: self._lines[n] != 'META:\n')
930 if not found:
931 return
932 ln += 1
934 while True:
935 if self._lines[ln].startswith('Time:'):
936 matched = self._TIME_PATTERN.match(self._lines[ln])
937 if matched:
938 self._time = time.mktime(datetime.datetime.strptime(
939 matched.group(1), '%Y/%m/%d %H:%M:%S').timetuple())
940 if matched.group(2):
941 self._time += float(matched.group(2)[1:]) / 1000.0
942 else:
943 break
944 ln += 1
946 def _parse_mmap_list(self):
947 """Parses lines in self._lines as a mmap list."""
948 (ln, found) = skip_while(
949 0, len(self._lines),
950 lambda n: self._lines[n] != 'MMAP_LIST:\n')
951 if not found:
952 return {}
954 ln += 1
955 self._map = {}
956 while True:
957 entry = proc_maps.ProcMaps.parse_line(self._lines[ln])
958 if entry:
959 for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end):
960 for key, value in entry.as_dict().iteritems():
961 attr[key] = value
962 ln += 1
963 continue
964 matched = self._HOOK_PATTERN.match(self._lines[ln])
965 if not matched:
966 break
967 # 2: starting address
968 # 5: end address
969 # 7: hooked or unhooked
970 # 8: additional information
971 self._map[(int(matched.group(2), 16),
972 int(matched.group(5), 16))] = (matched.group(7),
973 matched.group(8))
974 ln += 1
976 def _extract_stacktrace_lines(self, line_number):
977 """Extracts the position of stacktrace lines.
979 Valid stacktrace lines are stored into self._stacktrace_lines.
981 Args:
982 line_number: A line number to start parsing in lines.
984 Raises:
985 ParsingException for invalid dump versions.
987 if self._version in (DUMP_DEEP_5, DUMP_DEEP_6):
988 (line_number, _) = skip_while(
989 line_number, len(self._lines),
990 lambda n: not self._lines[n].split()[0].isdigit())
991 stacktrace_start = line_number
992 (line_number, _) = skip_while(
993 line_number, len(self._lines),
994 lambda n: self._check_stacktrace_line(self._lines[n]))
995 self._stacktrace_lines = self._lines[stacktrace_start:line_number]
997 elif self._version in DUMP_DEEP_OBSOLETE:
998 raise ObsoleteDumpVersionException(self._version)
1000 else:
1001 raise InvalidDumpException('Invalid version: %s' % self._version)
1003 @staticmethod
1004 def _check_stacktrace_line(stacktrace_line):
1005 """Checks if a given stacktrace_line is valid as stacktrace.
1007 Args:
1008 stacktrace_line: A string to be checked.
1010 Returns:
1011 True if the given stacktrace_line is valid.
1013 words = stacktrace_line.split()
1014 if len(words) < BUCKET_ID + 1:
1015 return False
1016 if words[BUCKET_ID - 1] != '@':
1017 return False
1018 return True
1021 class DumpList(object):
1022 """Represents a sequence of heap profile dumps."""
1024 def __init__(self, dump_list):
1025 self._dump_list = dump_list
1027 @staticmethod
1028 def load(path_list):
1029 LOGGER.info('Loading heap dump profiles.')
1030 dump_list = []
1031 for path in path_list:
1032 dump_list.append(Dump.load(path, ' '))
1033 return DumpList(dump_list)
1035 def __len__(self):
1036 return len(self._dump_list)
1038 def __iter__(self):
1039 for dump in self._dump_list:
1040 yield dump
1042 def __getitem__(self, index):
1043 return self._dump_list[index]
1046 class Command(object):
1047 """Subclasses are a subcommand for this executable.
1049 See COMMANDS in main().
1051 def __init__(self, usage):
1052 self._parser = optparse.OptionParser(usage)
1054 @staticmethod
1055 def load_basic_files(
1056 dump_path, multiple, no_dump=False, fake_directories=None):
1057 prefix = Command._find_prefix(dump_path)
1058 symbol_data_sources = SymbolDataSources(prefix, fake_directories or {})
1059 symbol_data_sources.prepare()
1060 bucket_set = BucketSet()
1061 bucket_set.load(prefix)
1062 if not no_dump:
1063 if multiple:
1064 dump_list = DumpList.load(Command._find_all_dumps(dump_path))
1065 else:
1066 dump = Dump.load(dump_path)
1067 symbol_mapping_cache = SymbolMappingCache()
1068 with open(prefix + '.cache.function', 'a+') as cache_f:
1069 symbol_mapping_cache.update(
1070 FUNCTION_SYMBOLS, bucket_set,
1071 SymbolFinder(FUNCTION_SYMBOLS, symbol_data_sources), cache_f)
1072 with open(prefix + '.cache.typeinfo', 'a+') as cache_f:
1073 symbol_mapping_cache.update(
1074 TYPEINFO_SYMBOLS, bucket_set,
1075 SymbolFinder(TYPEINFO_SYMBOLS, symbol_data_sources), cache_f)
1076 with open(prefix + '.cache.sourcefile', 'a+') as cache_f:
1077 symbol_mapping_cache.update(
1078 SOURCEFILE_SYMBOLS, bucket_set,
1079 SymbolFinder(SOURCEFILE_SYMBOLS, symbol_data_sources), cache_f)
1080 bucket_set.symbolize(symbol_mapping_cache)
1081 if no_dump:
1082 return bucket_set
1083 elif multiple:
1084 return (bucket_set, dump_list)
1085 else:
1086 return (bucket_set, dump)
1088 @staticmethod
1089 def _find_prefix(path):
1090 return re.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path)
1092 @staticmethod
1093 def _find_all_dumps(dump_path):
1094 prefix = Command._find_prefix(dump_path)
1095 dump_path_list = [dump_path]
1097 n = int(dump_path[len(dump_path) - 9 : len(dump_path) - 5])
1098 n += 1
1099 while True:
1100 p = '%s.%04d.heap' % (prefix, n)
1101 if os.path.exists(p):
1102 dump_path_list.append(p)
1103 else:
1104 break
1105 n += 1
1107 return dump_path_list
1109 @staticmethod
1110 def _find_all_buckets(dump_path):
1111 prefix = Command._find_prefix(dump_path)
1112 bucket_path_list = []
1114 n = 0
1115 while True:
1116 path = '%s.%04d.buckets' % (prefix, n)
1117 if not os.path.exists(path):
1118 if n > 10:
1119 break
1120 n += 1
1121 continue
1122 bucket_path_list.append(path)
1123 n += 1
1125 return bucket_path_list
1127 def _parse_args(self, sys_argv, required):
1128 options, args = self._parser.parse_args(sys_argv)
1129 if len(args) != required + 1:
1130 self._parser.error('needs %d argument(s).\n' % required)
1131 return None
1132 return (options, args)
1134 @staticmethod
1135 def _parse_policy_list(options_policy):
1136 if options_policy:
1137 return options_policy.split(',')
1138 else:
1139 return None
1142 class BucketsCommand(Command):
1143 def __init__(self):
1144 super(BucketsCommand, self).__init__('Usage: %prog buckets <first-dump>')
1146 def do(self, sys_argv, out=sys.stdout):
1147 _, args = self._parse_args(sys_argv, 1)
1148 dump_path = args[1]
1149 bucket_set = Command.load_basic_files(dump_path, True, True)
1151 BucketsCommand._output(bucket_set, out)
1152 return 0
1154 @staticmethod
1155 def _output(bucket_set, out):
1156 """Prints all buckets with resolving symbols.
1158 Args:
1159 bucket_set: A BucketSet object.
1160 out: An IO object to output.
1162 for bucket_id, bucket in sorted(bucket_set):
1163 out.write('%d: %s\n' % (bucket_id, bucket))
1166 class StacktraceCommand(Command):
1167 def __init__(self):
1168 super(StacktraceCommand, self).__init__(
1169 'Usage: %prog stacktrace <dump>')
1171 def do(self, sys_argv):
1172 _, args = self._parse_args(sys_argv, 1)
1173 dump_path = args[1]
1174 (bucket_set, dump) = Command.load_basic_files(dump_path, False)
1176 StacktraceCommand._output(dump, bucket_set, sys.stdout)
1177 return 0
1179 @staticmethod
1180 def _output(dump, bucket_set, out):
1181 """Outputs a given stacktrace.
1183 Args:
1184 bucket_set: A BucketSet object.
1185 out: A file object to output.
1187 for line in dump.iter_stacktrace:
1188 words = line.split()
1189 bucket = bucket_set.get(int(words[BUCKET_ID]))
1190 if not bucket:
1191 continue
1192 for i in range(0, BUCKET_ID - 1):
1193 out.write(words[i] + ' ')
1194 for frame in bucket.symbolized_stackfunction:
1195 out.write(frame + ' ')
1196 out.write('\n')
1199 class PolicyCommands(Command):
1200 def __init__(self, command):
1201 super(PolicyCommands, self).__init__(
1202 'Usage: %%prog %s [-p POLICY] <first-dump>' % command)
1203 self._parser.add_option('-p', '--policy', type='string', dest='policy',
1204 help='profile with POLICY', metavar='POLICY')
1205 self._parser.add_option('--fake-directories', dest='fake_directories',
1206 metavar='/path/on/target@/path/on/host[:...]',
1207 help='Read files in /path/on/host/ instead of '
1208 'files in /path/on/target/.')
1210 def _set_up(self, sys_argv):
1211 options, args = self._parse_args(sys_argv, 1)
1212 dump_path = args[1]
1213 fake_directories_dict = {}
1214 if options.fake_directories:
1215 for fake_directory_pair in options.fake_directories.split(':'):
1216 target_path, host_path = fake_directory_pair.split('@', 1)
1217 fake_directories_dict[target_path] = host_path
1218 (bucket_set, dumps) = Command.load_basic_files(
1219 dump_path, True, fake_directories=fake_directories_dict)
1221 policy_set = PolicySet.load(Command._parse_policy_list(options.policy))
1222 return policy_set, dumps, bucket_set
1224 @staticmethod
1225 def _apply_policy(dump, policy, bucket_set, first_dump_time):
1226 """Aggregates the total memory size of each component.
1228 Iterate through all stacktraces and attribute them to one of the components
1229 based on the policy. It is important to apply policy in right order.
1231 Args:
1232 dump: A Dump object.
1233 policy: A Policy object.
1234 bucket_set: A BucketSet object.
1235 first_dump_time: An integer representing time when the first dump is
1236 dumped.
1238 Returns:
1239 A dict mapping components and their corresponding sizes.
1241 LOGGER.info(' %s' % dump.path)
1242 sizes = dict((c, 0) for c in policy.components)
1244 PolicyCommands._accumulate(dump, policy, bucket_set, sizes)
1246 sizes['mmap-no-log'] = (
1247 dump.global_stat('profiled-mmap_committed') -
1248 sizes['mmap-total-log'])
1249 sizes['mmap-total-record'] = dump.global_stat('profiled-mmap_committed')
1250 sizes['mmap-total-record-vm'] = dump.global_stat('profiled-mmap_virtual')
1252 sizes['tc-no-log'] = (
1253 dump.global_stat('profiled-malloc_committed') -
1254 sizes['tc-total-log'])
1255 sizes['tc-total-record'] = dump.global_stat('profiled-malloc_committed')
1256 sizes['tc-unused'] = (
1257 sizes['mmap-tcmalloc'] -
1258 dump.global_stat('profiled-malloc_committed'))
1259 sizes['tc-total'] = sizes['mmap-tcmalloc']
1261 for key, value in {
1262 'total': 'total_committed',
1263 'filemapped': 'file_committed',
1264 'absent': 'absent_committed',
1265 'file-exec': 'file-exec_committed',
1266 'file-nonexec': 'file-nonexec_committed',
1267 'anonymous': 'anonymous_committed',
1268 'stack': 'stack_committed',
1269 'other': 'other_committed',
1270 'unhooked-absent': 'nonprofiled-absent_committed',
1271 'unhooked-anonymous': 'nonprofiled-anonymous_committed',
1272 'unhooked-file-exec': 'nonprofiled-file-exec_committed',
1273 'unhooked-file-nonexec': 'nonprofiled-file-nonexec_committed',
1274 'unhooked-stack': 'nonprofiled-stack_committed',
1275 'unhooked-other': 'nonprofiled-other_committed',
1276 'total-vm': 'total_virtual',
1277 'filemapped-vm': 'file_virtual',
1278 'anonymous-vm': 'anonymous_virtual',
1279 'other-vm': 'other_virtual' }.iteritems():
1280 if key in sizes:
1281 sizes[key] = dump.global_stat(value)
1283 if 'mustbezero' in sizes:
1284 removed_list = (
1285 'profiled-mmap_committed',
1286 'nonprofiled-absent_committed',
1287 'nonprofiled-anonymous_committed',
1288 'nonprofiled-file-exec_committed',
1289 'nonprofiled-file-nonexec_committed',
1290 'nonprofiled-stack_committed',
1291 'nonprofiled-other_committed')
1292 sizes['mustbezero'] = (
1293 dump.global_stat('total_committed') -
1294 sum(dump.global_stat(removed) for removed in removed_list))
1295 if 'total-exclude-profiler' in sizes:
1296 sizes['total-exclude-profiler'] = (
1297 dump.global_stat('total_committed') -
1298 (sizes['mmap-profiler'] + sizes['mmap-type-profiler']))
1299 if 'hour' in sizes:
1300 sizes['hour'] = (dump.time - first_dump_time) / 60.0 / 60.0
1301 if 'minute' in sizes:
1302 sizes['minute'] = (dump.time - first_dump_time) / 60.0
1303 if 'second' in sizes:
1304 sizes['second'] = dump.time - first_dump_time
1306 return sizes
1308 @staticmethod
1309 def _accumulate(dump, policy, bucket_set, sizes):
1310 for line in dump.iter_stacktrace:
1311 words = line.split()
1312 bucket = bucket_set.get(int(words[BUCKET_ID]))
1313 component_match = policy.find(bucket)
1314 sizes[component_match] += int(words[COMMITTED])
1316 if component_match.startswith('tc-'):
1317 sizes['tc-total-log'] += int(words[COMMITTED])
1318 elif component_match.startswith('mmap-'):
1319 sizes['mmap-total-log'] += int(words[COMMITTED])
1320 else:
1321 sizes['other-total-log'] += int(words[COMMITTED])
1324 class CSVCommand(PolicyCommands):
1325 def __init__(self):
1326 super(CSVCommand, self).__init__('csv')
1328 def do(self, sys_argv):
1329 policy_set, dumps, bucket_set = self._set_up(sys_argv)
1330 return CSVCommand._output(policy_set, dumps, bucket_set, sys.stdout)
1332 @staticmethod
1333 def _output(policy_set, dumps, bucket_set, out):
1334 max_components = 0
1335 for label in policy_set:
1336 max_components = max(max_components, len(policy_set[label].components))
1338 for label in sorted(policy_set):
1339 components = policy_set[label].components
1340 if len(policy_set) > 1:
1341 out.write('%s%s\n' % (label, ',' * (max_components - 1)))
1342 out.write('%s%s\n' % (
1343 ','.join(components), ',' * (max_components - len(components))))
1345 LOGGER.info('Applying a policy %s to...' % label)
1346 for dump in dumps:
1347 component_sizes = PolicyCommands._apply_policy(
1348 dump, policy_set[label], bucket_set, dumps[0].time)
1349 s = []
1350 for c in components:
1351 if c in ('hour', 'minute', 'second'):
1352 s.append('%05.5f' % (component_sizes[c]))
1353 else:
1354 s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0))
1355 out.write('%s%s\n' % (
1356 ','.join(s), ',' * (max_components - len(components))))
1358 bucket_set.clear_component_cache()
1360 return 0
1363 class JSONCommand(PolicyCommands):
1364 def __init__(self):
1365 super(JSONCommand, self).__init__('json')
1367 def do(self, sys_argv):
1368 policy_set, dumps, bucket_set = self._set_up(sys_argv)
1369 return JSONCommand._output(policy_set, dumps, bucket_set, sys.stdout)
1371 @staticmethod
1372 def _output(policy_set, dumps, bucket_set, out):
1373 json_base = {
1374 'version': 'JSON_DEEP_2',
1375 'policies': {},
1378 for label in sorted(policy_set):
1379 json_base['policies'][label] = {
1380 'legends': policy_set[label].components,
1381 'snapshots': [],
1384 LOGGER.info('Applying a policy %s to...' % label)
1385 for dump in dumps:
1386 component_sizes = PolicyCommands._apply_policy(
1387 dump, policy_set[label], bucket_set, dumps[0].time)
1388 component_sizes['dump_path'] = dump.path
1389 component_sizes['dump_time'] = datetime.datetime.fromtimestamp(
1390 dump.time).strftime('%Y-%m-%d %H:%M:%S')
1391 json_base['policies'][label]['snapshots'].append(component_sizes)
1393 bucket_set.clear_component_cache()
1395 json.dump(json_base, out, indent=2, sort_keys=True)
1397 return 0
1400 class ListCommand(PolicyCommands):
1401 def __init__(self):
1402 super(ListCommand, self).__init__('list')
1404 def do(self, sys_argv):
1405 policy_set, dumps, bucket_set = self._set_up(sys_argv)
1406 return ListCommand._output(policy_set, dumps, bucket_set, sys.stdout)
1408 @staticmethod
1409 def _output(policy_set, dumps, bucket_set, out):
1410 for label in sorted(policy_set):
1411 LOGGER.info('Applying a policy %s to...' % label)
1412 for dump in dumps:
1413 component_sizes = PolicyCommands._apply_policy(
1414 dump, policy_set[label], bucket_set, dump.time)
1415 out.write('%s for %s:\n' % (label, dump.path))
1416 for c in policy_set[label].components:
1417 if c in ['hour', 'minute', 'second']:
1418 out.write('%40s %12.3f\n' % (c, component_sizes[c]))
1419 else:
1420 out.write('%40s %12d\n' % (c, component_sizes[c]))
1422 bucket_set.clear_component_cache()
1424 return 0
1427 class MapCommand(Command):
1428 def __init__(self):
1429 super(MapCommand, self).__init__('Usage: %prog map <first-dump> <policy>')
1431 def do(self, sys_argv, out=sys.stdout):
1432 _, args = self._parse_args(sys_argv, 2)
1433 dump_path = args[1]
1434 target_policy = args[2]
1435 (bucket_set, dumps) = Command.load_basic_files(dump_path, True)
1436 policy_set = PolicySet.load(Command._parse_policy_list(target_policy))
1438 MapCommand._output(dumps, bucket_set, policy_set[target_policy], out)
1439 return 0
1441 @staticmethod
1442 def _output(dumps, bucket_set, policy, out):
1443 """Prints all stacktraces in a given component of given depth.
1445 Args:
1446 dumps: A list of Dump objects.
1447 bucket_set: A BucketSet object.
1448 policy: A Policy object.
1449 out: An IO object to output.
1451 max_dump_count = 0
1452 range_dict = ExclusiveRangeDict(ListAttribute)
1453 for dump in dumps:
1454 max_dump_count = max(max_dump_count, dump.count)
1455 for key, value in dump.iter_map:
1456 for begin, end, attr in range_dict.iter_range(key[0], key[1]):
1457 attr[dump.count] = value
1459 max_dump_count_digit = len(str(max_dump_count))
1460 for begin, end, attr in range_dict.iter_range():
1461 out.write('%x-%x\n' % (begin, end))
1462 if len(attr) < max_dump_count:
1463 attr[max_dump_count] = None
1464 for index, x in enumerate(attr[1:]):
1465 out.write(' #%0*d: ' % (max_dump_count_digit, index + 1))
1466 if not x:
1467 out.write('None\n')
1468 elif x[0] == 'hooked':
1469 attrs = x[1].split()
1470 assert len(attrs) == 3
1471 bucket_id = int(attrs[2])
1472 bucket = bucket_set.get(bucket_id)
1473 component = policy.find(bucket)
1474 out.write('hooked %s: %s @ %d\n' % (attrs[0], component, bucket_id))
1475 else:
1476 attrs = x[1].split()
1477 size = int(attrs[1])
1478 out.write('unhooked %s: %d bytes committed\n' % (attrs[0], size))
1481 class ExpandCommand(Command):
1482 def __init__(self):
1483 super(ExpandCommand, self).__init__(
1484 'Usage: %prog expand <dump> <policy> <component> <depth>')
1486 def do(self, sys_argv):
1487 _, args = self._parse_args(sys_argv, 4)
1488 dump_path = args[1]
1489 target_policy = args[2]
1490 component_name = args[3]
1491 depth = args[4]
1492 (bucket_set, dump) = Command.load_basic_files(dump_path, False)
1493 policy_set = PolicySet.load(Command._parse_policy_list(target_policy))
1495 ExpandCommand._output(dump, policy_set[target_policy], bucket_set,
1496 component_name, int(depth), sys.stdout)
1497 return 0
1499 @staticmethod
1500 def _output(dump, policy, bucket_set, component_name, depth, out):
1501 """Prints all stacktraces in a given component of given depth.
1503 Args:
1504 dump: A Dump object.
1505 policy: A Policy object.
1506 bucket_set: A BucketSet object.
1507 component_name: A name of component for filtering.
1508 depth: An integer representing depth to be printed.
1509 out: An IO object to output.
1511 sizes = {}
1513 ExpandCommand._accumulate(
1514 dump, policy, bucket_set, component_name, depth, sizes)
1516 sorted_sizes_list = sorted(
1517 sizes.iteritems(), key=(lambda x: x[1]), reverse=True)
1518 total = 0
1519 # TODO(dmikurube): Better formatting.
1520 for size_pair in sorted_sizes_list:
1521 out.write('%10d %s\n' % (size_pair[1], size_pair[0]))
1522 total += size_pair[1]
1523 LOGGER.info('total: %d\n' % total)
1525 @staticmethod
1526 def _accumulate(dump, policy, bucket_set, component_name, depth, sizes):
1527 for line in dump.iter_stacktrace:
1528 words = line.split()
1529 bucket = bucket_set.get(int(words[BUCKET_ID]))
1530 component_match = policy.find(bucket)
1531 if component_match == component_name:
1532 stacktrace_sequence = ''
1533 if bucket.typeinfo:
1534 stacktrace_sequence += '(type=%s)' % bucket.symbolized_typeinfo
1535 stacktrace_sequence += ' (type.name=%s) ' % bucket.typeinfo_name
1536 for function, sourcefile in zip(
1537 bucket.symbolized_stackfunction[
1538 0 : min(len(bucket.symbolized_stackfunction), 1 + depth)],
1539 bucket.symbolized_stacksourcefile[
1540 0 : min(len(bucket.symbolized_stacksourcefile), 1 + depth)]):
1541 stacktrace_sequence += '%s(@%s) ' % (function, sourcefile)
1542 if not stacktrace_sequence in sizes:
1543 sizes[stacktrace_sequence] = 0
1544 sizes[stacktrace_sequence] += int(words[COMMITTED])
1547 class PProfCommand(Command):
1548 def __init__(self):
1549 super(PProfCommand, self).__init__(
1550 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>')
1551 self._parser.add_option('-c', '--component', type='string',
1552 dest='component',
1553 help='restrict to COMPONENT', metavar='COMPONENT')
1555 def do(self, sys_argv):
1556 options, args = self._parse_args(sys_argv, 2)
1558 dump_path = args[1]
1559 target_policy = args[2]
1560 component = options.component
1562 (bucket_set, dump) = Command.load_basic_files(dump_path, False)
1563 policy_set = PolicySet.load(Command._parse_policy_list(target_policy))
1565 with open(Command._find_prefix(dump_path) + '.maps', 'r') as maps_f:
1566 maps_lines = maps_f.readlines()
1567 PProfCommand._output(
1568 dump, policy_set[target_policy], bucket_set, maps_lines, component,
1569 sys.stdout)
1571 return 0
1573 @staticmethod
1574 def _output(dump, policy, bucket_set, maps_lines, component_name, out):
1575 """Converts the heap profile dump so it can be processed by pprof.
1577 Args:
1578 dump: A Dump object.
1579 policy: A Policy object.
1580 bucket_set: A BucketSet object.
1581 maps_lines: A list of strings containing /proc/.../maps.
1582 component_name: A name of component for filtering.
1583 out: An IO object to output.
1585 out.write('heap profile: ')
1586 com_committed, com_allocs = PProfCommand._accumulate(
1587 dump, policy, bucket_set, component_name)
1589 out.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % (
1590 com_allocs, com_committed, com_allocs, com_committed))
1592 PProfCommand._output_stacktrace_lines(
1593 dump, policy, bucket_set, component_name, out)
1595 out.write('MAPPED_LIBRARIES:\n')
1596 for line in maps_lines:
1597 out.write(line)
1599 @staticmethod
1600 def _accumulate(dump, policy, bucket_set, component_name):
1601 """Accumulates size of committed chunks and the number of allocated chunks.
1603 Args:
1604 dump: A Dump object.
1605 policy: A Policy object.
1606 bucket_set: A BucketSet object.
1607 component_name: A name of component for filtering.
1609 Returns:
1610 Two integers which are the accumulated size of committed regions and the
1611 number of allocated chunks, respectively.
1613 com_committed = 0
1614 com_allocs = 0
1615 for line in dump.iter_stacktrace:
1616 words = line.split()
1617 bucket = bucket_set.get(int(words[BUCKET_ID]))
1618 if (not bucket or
1619 (component_name and component_name != policy.find(bucket))):
1620 continue
1622 com_committed += int(words[COMMITTED])
1623 com_allocs += int(words[ALLOC_COUNT]) - int(words[FREE_COUNT])
1625 return com_committed, com_allocs
1627 @staticmethod
1628 def _output_stacktrace_lines(dump, policy, bucket_set, component_name, out):
1629 """Prints information of stacktrace lines for pprof.
1631 Args:
1632 dump: A Dump object.
1633 policy: A Policy object.
1634 bucket_set: A BucketSet object.
1635 component_name: A name of component for filtering.
1636 out: An IO object to output.
1638 for line in dump.iter_stacktrace:
1639 words = line.split()
1640 bucket = bucket_set.get(int(words[BUCKET_ID]))
1641 if (not bucket or
1642 (component_name and component_name != policy.find(bucket))):
1643 continue
1645 out.write('%6d: %8s [%6d: %8s] @' % (
1646 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),
1647 words[COMMITTED],
1648 int(words[ALLOC_COUNT]) - int(words[FREE_COUNT]),
1649 words[COMMITTED]))
1650 for address in bucket.stacktrace:
1651 out.write(' 0x%016x' % address)
1652 out.write('\n')
1655 class UploadCommand(Command):
1656 def __init__(self):
1657 super(UploadCommand, self).__init__(
1658 'Usage: %prog upload [--gsutil path/to/gsutil] '
1659 '<first-dump> <destination-gs-path>')
1660 self._parser.add_option('--gsutil', default='gsutil',
1661 help='path to GSUTIL', metavar='GSUTIL')
1663 def do(self, sys_argv):
1664 options, args = self._parse_args(sys_argv, 2)
1665 dump_path = args[1]
1666 gs_path = args[2]
1668 dump_files = Command._find_all_dumps(dump_path)
1669 bucket_files = Command._find_all_buckets(dump_path)
1670 prefix = Command._find_prefix(dump_path)
1671 symbol_data_sources = SymbolDataSources(prefix)
1672 symbol_data_sources.prepare()
1673 symbol_path = symbol_data_sources.path()
1675 handle_zip, filename_zip = tempfile.mkstemp('.zip', 'dmprof')
1676 os.close(handle_zip)
1678 try:
1679 file_zip = zipfile.ZipFile(filename_zip, 'w', zipfile.ZIP_DEFLATED)
1680 for filename in dump_files:
1681 file_zip.write(filename, os.path.basename(os.path.abspath(filename)))
1682 for filename in bucket_files:
1683 file_zip.write(filename, os.path.basename(os.path.abspath(filename)))
1685 symbol_basename = os.path.basename(os.path.abspath(symbol_path))
1686 for filename in os.listdir(symbol_path):
1687 if not filename.startswith('.'):
1688 file_zip.write(os.path.join(symbol_path, filename),
1689 os.path.join(symbol_basename, os.path.basename(
1690 os.path.abspath(filename))))
1691 file_zip.close()
1693 returncode = UploadCommand._run_gsutil(
1694 options.gsutil, 'cp', '-a', 'public-read', filename_zip, gs_path)
1695 finally:
1696 os.remove(filename_zip)
1698 return returncode
1700 @staticmethod
1701 def _run_gsutil(gsutil, *args):
1702 """Run gsutil as a subprocess.
1704 Args:
1705 *args: Arguments to pass to gsutil. The first argument should be an
1706 operation such as ls, cp or cat.
1707 Returns:
1708 The return code from the process.
1710 command = [gsutil] + list(args)
1711 LOGGER.info("Running: %s", command)
1713 try:
1714 return subprocess.call(command)
1715 except OSError, e:
1716 LOGGER.error('Error to run gsutil: %s', e)
1719 def main():
1720 COMMANDS = {
1721 'buckets': BucketsCommand,
1722 'csv': CSVCommand,
1723 'expand': ExpandCommand,
1724 'json': JSONCommand,
1725 'list': ListCommand,
1726 'map': MapCommand,
1727 'pprof': PProfCommand,
1728 'stacktrace': StacktraceCommand,
1729 'upload': UploadCommand,
1732 if len(sys.argv) < 2 or (not sys.argv[1] in COMMANDS):
1733 sys.stderr.write("""Usage: dmprof <command> [options] [<args>]
1735 Commands:
1736 buckets Dump a bucket list with resolving symbols
1737 csv Classify memory usage in CSV
1738 expand Show all stacktraces contained in the specified component
1739 json Classify memory usage in JSON
1740 list Classify memory usage in simple listing format
1741 map Show history of mapped regions
1742 pprof Format the profile dump so that it can be processed by pprof
1743 stacktrace Convert runtime addresses to symbol names
1744 upload Upload dumped files
1746 Quick Reference:
1747 dmprof buckets <first-dump>
1748 dmprof csv [-p POLICY] <first-dump>
1749 dmprof expand <dump> <policy> <component> <depth>
1750 dmprof json [-p POLICY] <first-dump>
1751 dmprof list [-p POLICY] <first-dump>
1752 dmprof map <first-dump> <policy>
1753 dmprof pprof [-c COMPONENT] <dump> <policy>
1754 dmprof stacktrace <dump>
1755 dmprof upload [--gsutil path/to/gsutil] <first-dump> <destination-gs-path>
1756 """)
1757 sys.exit(1)
1758 action = sys.argv.pop(1)
1760 LOGGER.setLevel(logging.DEBUG)
1761 handler = logging.StreamHandler()
1762 handler.setLevel(logging.INFO)
1763 formatter = logging.Formatter('%(message)s')
1764 handler.setFormatter(formatter)
1765 LOGGER.addHandler(handler)
1767 try:
1768 errorcode = COMMANDS[action]().do(sys.argv)
1769 except ParsingException, e:
1770 errorcode = 1
1771 sys.stderr.write('Exit by parsing error: %s\n' % e)
1773 return errorcode
1776 if __name__ == '__main__':
1777 sys.exit(main())