1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """The deep heap profiler script for Chrome."""
20 from range_dict
import ExclusiveRangeDict
22 BASE_PATH
= os
.path
.dirname(os
.path
.abspath(__file__
))
23 FIND_RUNTIME_SYMBOLS_PATH
= os
.path
.join(
24 BASE_PATH
, os
.pardir
, 'find_runtime_symbols')
25 sys
.path
.append(FIND_RUNTIME_SYMBOLS_PATH
)
27 import find_runtime_symbols
28 import prepare_symbol_info
31 from find_runtime_symbols
import FUNCTION_SYMBOLS
32 from find_runtime_symbols
import SOURCEFILE_SYMBOLS
33 from find_runtime_symbols
import TYPEINFO_SYMBOLS
40 NULL_REGEX
= re
.compile('')
42 LOGGER
= logging
.getLogger('dmprof')
43 POLICIES_JSON_PATH
= os
.path
.join(BASE_PATH
, 'policies.json')
46 # Heap Profile Dump versions
48 # DUMP_DEEP_[1-4] are obsolete.
49 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks.
50 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps.
51 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".
52 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.
53 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.
54 DUMP_DEEP_1
= 'DUMP_DEEP_1'
55 DUMP_DEEP_2
= 'DUMP_DEEP_2'
56 DUMP_DEEP_3
= 'DUMP_DEEP_3'
57 DUMP_DEEP_4
= 'DUMP_DEEP_4'
59 DUMP_DEEP_OBSOLETE
= (DUMP_DEEP_1
, DUMP_DEEP_2
, DUMP_DEEP_3
, DUMP_DEEP_4
)
61 # DUMP_DEEP_5 doesn't separate sections for malloc and mmap.
62 # malloc and mmap are identified in bucket files.
63 # DUMP_DEEP_5 should be processed by POLICY_DEEP_4.
64 DUMP_DEEP_5
= 'DUMP_DEEP_5'
66 # DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5.
67 DUMP_DEEP_6
= 'DUMP_DEEP_6'
69 # Heap Profile Policy versions
71 # POLICY_DEEP_1 DOES NOT include allocation_type columns.
72 # mmap regions are distincted w/ mmap frames in the pattern column.
73 POLICY_DEEP_1
= 'POLICY_DEEP_1'
75 # POLICY_DEEP_2 DOES include allocation_type columns.
76 # mmap regions are distincted w/ the allocation_type column.
77 POLICY_DEEP_2
= 'POLICY_DEEP_2'
79 # POLICY_DEEP_3 is in JSON format.
80 POLICY_DEEP_3
= 'POLICY_DEEP_3'
82 # POLICY_DEEP_3 contains typeinfo.
83 POLICY_DEEP_4
= 'POLICY_DEEP_4'
86 class EmptyDumpException(Exception):
87 def __init__(self
, value
=''):
88 super(EmptyDumpException
, self
).__init
__()
91 return repr(self
.value
)
94 class ParsingException(Exception):
95 def __init__(self
, value
=''):
96 super(ParsingException
, self
).__init
__()
99 return repr(self
.value
)
102 class InvalidDumpException(ParsingException
):
103 def __init__(self
, value
):
104 super(InvalidDumpException
, self
).__init
__()
107 return "invalid heap profile dump: %s" % repr(self
.value
)
110 class ObsoleteDumpVersionException(ParsingException
):
111 def __init__(self
, value
):
112 super(ObsoleteDumpVersionException
, self
).__init
__()
115 return "obsolete heap profile dump version: %s" % repr(self
.value
)
118 class ListAttribute(ExclusiveRangeDict
.RangeAttribute
):
119 """Represents a list for an attribute in range_dict.ExclusiveRangeDict."""
121 super(ListAttribute
, self
).__init
__()
125 return str(self
._list
)
128 return 'ListAttribute' + str(self
._list
)
131 return len(self
._list
)
137 def __getitem__(self
, index
):
138 return self
._list
[index
]
140 def __setitem__(self
, index
, value
):
141 if index
>= len(self
._list
):
142 self
._list
.extend([None] * (index
+ 1 - len(self
._list
)))
143 self
._list
[index
] = value
146 new_list
= ListAttribute()
147 for index
, item
in enumerate(self
._list
):
148 new_list
[index
] = copy
.deepcopy(item
)
152 class ProcMapsEntryAttribute(ExclusiveRangeDict
.RangeAttribute
):
153 """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict."""
154 _DUMMY_ENTRY
= proc_maps
.ProcMapsEntry(
169 super(ProcMapsEntryAttribute
, self
).__init
__()
170 self
._entry
= self
._DUMMY
_ENTRY
.as_dict()
173 return str(self
._entry
)
176 return 'ProcMapsEntryAttribute' + str(self
._entry
)
178 def __getitem__(self
, key
):
179 return self
._entry
[key
]
181 def __setitem__(self
, key
, value
):
182 if key
not in self
._entry
:
184 self
._entry
[key
] = value
187 new_entry
= ProcMapsEntryAttribute()
188 for key
, value
in self
._entry
.iteritems():
189 new_entry
[key
] = copy
.deepcopy(value
)
193 def skip_while(index
, max_index
, skipping_condition
):
194 """Increments |index| until |skipping_condition|(|index|) is False.
197 A pair of an integer indicating a line number after skipped, and a
198 boolean value which is True if found a line which skipping_condition
201 while skipping_condition(index
):
203 if index
>= max_index
:
208 class SymbolDataSources(object):
209 """Manages symbol data sources in a process.
211 The symbol data sources consist of maps (/proc/<pid>/maps), nm, readelf and
212 so on. They are collected into a directory '|prefix|.symmap' from the binary
213 files by 'prepare()' with tools/find_runtime_symbols/prepare_symbol_info.py.
215 Binaries are not mandatory to profile. The prepared data sources work in
216 place of the binary even if the binary has been overwritten with another
219 Note that loading the symbol data sources takes a long time. They are often
220 very big. So, the 'dmprof' profiler is designed to use 'SymbolMappingCache'
221 which caches actually used symbols.
223 def __init__(self
, prefix
, fake_directories
=None):
224 self
._prefix
= prefix
225 self
._prepared
_symbol
_data
_sources
_path
= None
226 self
._loaded
_symbol
_data
_sources
= None
227 self
._fake
_directories
= fake_directories
or {}
230 """Prepares symbol data sources by extracting mapping from a binary.
232 The prepared symbol data sources are stored in a directory. The directory
233 name is stored in |self._prepared_symbol_data_sources_path|.
238 LOGGER
.info('Preparing symbol mapping...')
239 self
._prepared
_symbol
_data
_sources
_path
, used_tempdir
= (
240 prepare_symbol_info
.prepare_symbol_info(
241 self
._prefix
+ '.maps',
242 output_dir_path
=self
._prefix
+ '.symmap',
243 fake_directories
=self
._fake
_directories
,
245 use_source_file_name
=True))
246 if self
._prepared
_symbol
_data
_sources
_path
:
247 LOGGER
.info(' Prepared symbol mapping.')
249 LOGGER
.warn(' Using a temporary directory for symbol mapping.')
250 LOGGER
.warn(' Delete it by yourself.')
251 LOGGER
.warn(' Or, move the directory by yourself to use it later.')
254 LOGGER
.warn(' Failed to prepare symbol mapping.')
258 """Returns the prepared symbol data sources.
261 The prepared symbol data sources. None if failed.
263 if not self
._prepared
_symbol
_data
_sources
_path
and not self
.prepare():
265 if not self
._loaded
_symbol
_data
_sources
:
266 LOGGER
.info('Loading symbol mapping...')
267 self
._loaded
_symbol
_data
_sources
= (
268 find_runtime_symbols
.RuntimeSymbolsInProcess
.load(
269 self
._prepared
_symbol
_data
_sources
_path
))
270 return self
._loaded
_symbol
_data
_sources
273 """Returns the path of the prepared symbol data sources if possible."""
274 if not self
._prepared
_symbol
_data
_sources
_path
and not self
.prepare():
276 return self
._prepared
_symbol
_data
_sources
_path
279 class SymbolFinder(object):
280 """Finds corresponding symbols from addresses.
282 This class does only 'find()' symbols from a specified |address_list|.
283 It is introduced to make a finder mockable.
285 def __init__(self
, symbol_type
, symbol_data_sources
):
286 self
._symbol
_type
= symbol_type
287 self
._symbol
_data
_sources
= symbol_data_sources
289 def find(self
, address_list
):
290 return find_runtime_symbols
.find_runtime_symbols(
291 self
._symbol
_type
, self
._symbol
_data
_sources
.get(), address_list
)
294 class SymbolMappingCache(object):
295 """Caches mapping from actually used addresses to symbols.
297 'update()' updates the cache from the original symbol data sources via
298 'SymbolFinder'. Symbols can be looked up by the method 'lookup()'.
301 self
._symbol
_mapping
_caches
= {
302 FUNCTION_SYMBOLS
: {},
303 SOURCEFILE_SYMBOLS
: {},
304 TYPEINFO_SYMBOLS
: {},
307 def update(self
, symbol_type
, bucket_set
, symbol_finder
, cache_f
):
308 """Updates symbol mapping cache on memory and in a symbol cache file.
310 It reads cached symbol mapping from a symbol cache file |cache_f| if it
311 exists. Unresolved addresses are then resolved and added to the cache
312 both on memory and in the symbol cache file with using 'SymbolFinder'.
314 A cache file is formatted as follows:
321 symbol_type: A type of symbols to update. It should be one of
322 FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS.
323 bucket_set: A BucketSet object.
324 symbol_finder: A SymbolFinder object to find symbols.
325 cache_f: A readable and writable IO object of the symbol cache file.
327 cache_f
.seek(0, os
.SEEK_SET
)
328 self
._load
(cache_f
, symbol_type
)
330 unresolved_addresses
= sorted(
331 address
for address
in bucket_set
.iter_addresses(symbol_type
)
332 if address
not in self
._symbol
_mapping
_caches
[symbol_type
])
334 if not unresolved_addresses
:
335 LOGGER
.info('No need to resolve any more addresses.')
338 cache_f
.seek(0, os
.SEEK_END
)
339 LOGGER
.info('Loading %d unresolved addresses.' %
340 len(unresolved_addresses
))
341 symbol_dict
= symbol_finder
.find(unresolved_addresses
)
343 for address
, symbol
in symbol_dict
.iteritems():
344 stripped_symbol
= symbol
.strip() or '?'
345 self
._symbol
_mapping
_caches
[symbol_type
][address
] = stripped_symbol
346 cache_f
.write('%x %s\n' % (address
, stripped_symbol
))
348 def lookup(self
, symbol_type
, address
):
349 """Looks up a symbol for a given |address|.
352 symbol_type: A type of symbols to update. It should be one of
353 FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS.
354 address: An integer that represents an address.
357 A string that represents a symbol.
359 return self
._symbol
_mapping
_caches
[symbol_type
].get(address
)
361 def _load(self
, cache_f
, symbol_type
):
364 items
= line
.rstrip().split(None, 1)
367 self
._symbol
_mapping
_caches
[symbol_type
][int(items
[0], 16)] = items
[1]
368 LOGGER
.info('Loaded %d entries from symbol cache.' %
369 len(self
._symbol
_mapping
_caches
[symbol_type
]))
371 LOGGER
.info('The symbol cache file is invalid: %s' % e
)
375 """Represents one matching rule in a policy file."""
380 stackfunction_pattern
=None,
381 stacksourcefile_pattern
=None,
382 typeinfo_pattern
=None):
386 self
._stackfunction
_pattern
= None
387 if stackfunction_pattern
:
388 self
._stackfunction
_pattern
= re
.compile(
389 stackfunction_pattern
+ r
'\Z')
391 self
._stacksourcefile
_pattern
= None
392 if stacksourcefile_pattern
:
393 self
._stacksourcefile
_pattern
= re
.compile(
394 stacksourcefile_pattern
+ r
'\Z')
396 self
._typeinfo
_pattern
= None
398 self
._typeinfo
_pattern
= re
.compile(typeinfo_pattern
+ r
'\Z')
409 def stackfunction_pattern(self
):
410 return self
._stackfunction
_pattern
413 def stacksourcefile_pattern(self
):
414 return self
._stacksourcefile
_pattern
417 def typeinfo_pattern(self
):
418 return self
._typeinfo
_pattern
421 class Policy(object):
422 """Represents a policy, a content of a policy file."""
424 def __init__(self
, rules
, version
, components
):
426 self
._version
= version
427 self
._components
= components
438 def components(self
):
439 return self
._components
441 def find(self
, bucket
):
442 """Finds a matching component name which a given |bucket| belongs to.
445 bucket: A Bucket object to be searched for.
448 A string representing a component name.
452 if bucket
.component_cache
:
453 return bucket
.component_cache
455 stackfunction
= bucket
.symbolized_joined_stackfunction
456 stacksourcefile
= bucket
.symbolized_joined_stacksourcefile
457 typeinfo
= bucket
.symbolized_typeinfo
458 if typeinfo
.startswith('0x'):
459 typeinfo
= bucket
.typeinfo_name
461 for rule
in self
._rules
:
462 if (bucket
.mmap
== rule
.mmap
and
463 (not rule
.stackfunction_pattern
or
464 rule
.stackfunction_pattern
.match(stackfunction
)) and
465 (not rule
.stacksourcefile_pattern
or
466 rule
.stacksourcefile_pattern
.match(stacksourcefile
)) and
467 (not rule
.typeinfo_pattern
or rule
.typeinfo_pattern
.match(typeinfo
))):
468 bucket
.component_cache
= rule
.name
474 def load(filename
, filetype
):
475 """Loads a policy file of |filename| in a |format|.
478 filename: A filename to be loaded.
479 filetype: A string to specify a type of the file. Only 'json' is
483 A loaded Policy object.
485 with
open(os
.path
.join(BASE_PATH
, filename
)) as policy_f
:
486 return Policy
.parse(policy_f
, filetype
)
489 def parse(policy_f
, filetype
):
490 """Parses a policy file content in a |format|.
493 policy_f: An IO object to be loaded.
494 filetype: A string to specify a type of the file. Only 'json' is
498 A loaded Policy object.
500 if filetype
== 'json':
501 return Policy
._parse
_json
(policy_f
)
506 def _parse_json(policy_f
):
507 """Parses policy file in json format.
509 A policy file contains component's names and their stacktrace pattern
510 written in regular expression. Those patterns are matched against each
511 symbols of each stacktraces in the order written in the policy file
514 policy_f: A File/IO object to read.
517 A loaded policy object.
519 policy
= json
.load(policy_f
)
522 for rule
in policy
['rules']:
523 stackfunction
= rule
.get('stackfunction') or rule
.get('stacktrace')
524 stacksourcefile
= rule
.get('stacksourcefile')
527 rule
['allocator'] == 'mmap',
530 rule
['typeinfo'] if 'typeinfo' in rule
else None))
532 return Policy(rules
, policy
['version'], policy
['components'])
535 class PolicySet(object):
536 """Represents a set of policies."""
538 def __init__(self
, policy_directory
):
539 self
._policy
_directory
= policy_directory
542 def load(labels
=None):
543 """Loads a set of policies via the "default policy directory".
545 The "default policy directory" contains pairs of policies and their labels.
546 For example, a policy "policy.l0.json" is labeled "l0" in the default
547 policy directory "policies.json".
549 All policies in the directory are loaded by default. Policies can be
553 labels: An array that contains policy labels to be loaded.
558 default_policy_directory
= PolicySet
._load
_default
_policy
_directory
()
560 specified_policy_directory
= {}
562 if label
in default_policy_directory
:
563 specified_policy_directory
[label
] = default_policy_directory
[label
]
564 # TODO(dmikurube): Load an un-labeled policy file.
565 return PolicySet
._load
_policies
(specified_policy_directory
)
567 return PolicySet
._load
_policies
(default_policy_directory
)
570 return len(self
._policy
_directory
)
573 for label
in self
._policy
_directory
:
576 def __getitem__(self
, label
):
577 return self
._policy
_directory
[label
]
580 def _load_default_policy_directory():
581 with
open(POLICIES_JSON_PATH
, mode
='r') as policies_f
:
582 default_policy_directory
= json
.load(policies_f
)
583 return default_policy_directory
586 def _load_policies(directory
):
587 LOGGER
.info('Loading policy files.')
589 for label
in directory
:
590 LOGGER
.info(' %s: %s' % (label
, directory
[label
]['file']))
591 loaded
= Policy
.load(directory
[label
]['file'], directory
[label
]['format'])
593 policies
[label
] = loaded
594 return PolicySet(policies
)
597 class Bucket(object):
598 """Represents a bucket, which is a unit of memory block classification."""
600 def __init__(self
, stacktrace
, mmap
, typeinfo
, typeinfo_name
):
601 self
._stacktrace
= stacktrace
603 self
._typeinfo
= typeinfo
604 self
._typeinfo
_name
= typeinfo_name
606 self
._symbolized
_stackfunction
= stacktrace
607 self
._symbolized
_joined
_stackfunction
= ''
608 self
._symbolized
_stacksourcefile
= stacktrace
609 self
._symbolized
_joined
_stacksourcefile
= ''
610 self
._symbolized
_typeinfo
= typeinfo_name
612 self
.component_cache
= ''
616 result
.append('mmap' if self
._mmap
else 'malloc')
617 if self
._symbolized
_typeinfo
== 'no typeinfo':
618 result
.append('tno_typeinfo')
620 result
.append('t' + self
._symbolized
_typeinfo
)
621 result
.append('n' + self
._typeinfo
_name
)
622 result
.extend(['%s(@%s)' % (function
, sourcefile
)
623 for function
, sourcefile
624 in zip(self
._symbolized
_stackfunction
,
625 self
._symbolized
_stacksourcefile
)])
626 return ' '.join(result
)
628 def symbolize(self
, symbol_mapping_cache
):
629 """Makes a symbolized stacktrace and typeinfo with |symbol_mapping_cache|.
632 symbol_mapping_cache: A SymbolMappingCache object.
634 # TODO(dmikurube): Fill explicitly with numbers if symbol not found.
635 self
._symbolized
_stackfunction
= [
636 symbol_mapping_cache
.lookup(FUNCTION_SYMBOLS
, address
)
637 for address
in self
._stacktrace
]
638 self
._symbolized
_joined
_stackfunction
= ' '.join(
639 self
._symbolized
_stackfunction
)
640 self
._symbolized
_stacksourcefile
= [
641 symbol_mapping_cache
.lookup(SOURCEFILE_SYMBOLS
, address
)
642 for address
in self
._stacktrace
]
643 self
._symbolized
_joined
_stacksourcefile
= ' '.join(
644 self
._symbolized
_stacksourcefile
)
645 if not self
._typeinfo
:
646 self
._symbolized
_typeinfo
= 'no typeinfo'
648 self
._symbolized
_typeinfo
= symbol_mapping_cache
.lookup(
649 TYPEINFO_SYMBOLS
, self
._typeinfo
)
650 if not self
._symbolized
_typeinfo
:
651 self
._symbolized
_typeinfo
= 'no typeinfo'
653 def clear_component_cache(self
):
654 self
.component_cache
= ''
657 def stacktrace(self
):
658 return self
._stacktrace
666 return self
._typeinfo
669 def typeinfo_name(self
):
670 return self
._typeinfo
_name
673 def symbolized_stackfunction(self
):
674 return self
._symbolized
_stackfunction
677 def symbolized_joined_stackfunction(self
):
678 return self
._symbolized
_joined
_stackfunction
681 def symbolized_stacksourcefile(self
):
682 return self
._symbolized
_stacksourcefile
685 def symbolized_joined_stacksourcefile(self
):
686 return self
._symbolized
_joined
_stacksourcefile
689 def symbolized_typeinfo(self
):
690 return self
._symbolized
_typeinfo
693 class BucketSet(object):
694 """Represents a set of bucket."""
697 self
._code
_addresses
= set()
698 self
._typeinfo
_addresses
= set()
700 def load(self
, prefix
):
701 """Loads all related bucket files.
704 prefix: A prefix string for bucket file names.
706 LOGGER
.info('Loading bucket files.')
710 path
= '%s.%04d.buckets' % (prefix
, n
)
711 if not os
.path
.exists(path
):
716 LOGGER
.info(' %s' % path
)
717 with
open(path
, 'r') as f
:
721 def _load_file(self
, bucket_f
):
722 for line
in bucket_f
:
727 for index
, word
in enumerate(words
):
731 typeinfo
= int(word
[1:], 16)
732 self
._typeinfo
_addresses
.add(typeinfo
)
734 typeinfo_name
= word
[1:]
736 stacktrace_begin
= index
738 stacktrace
= [int(address
, 16) for address
in words
[stacktrace_begin
:]]
739 for frame
in stacktrace
:
740 self
._code
_addresses
.add(frame
)
741 self
._buckets
[int(words
[0])] = Bucket(
742 stacktrace
, words
[1] == 'mmap', typeinfo
, typeinfo_name
)
745 for bucket_id
, bucket_content
in self
._buckets
.iteritems():
746 yield bucket_id
, bucket_content
748 def __getitem__(self
, bucket_id
):
749 return self
._buckets
[bucket_id
]
751 def get(self
, bucket_id
):
752 return self
._buckets
.get(bucket_id
)
754 def symbolize(self
, symbol_mapping_cache
):
755 for bucket_content
in self
._buckets
.itervalues():
756 bucket_content
.symbolize(symbol_mapping_cache
)
758 def clear_component_cache(self
):
759 for bucket_content
in self
._buckets
.itervalues():
760 bucket_content
.clear_component_cache()
762 def iter_addresses(self
, symbol_type
):
763 if symbol_type
in [FUNCTION_SYMBOLS
, SOURCEFILE_SYMBOLS
]:
764 for function
in self
._code
_addresses
:
767 for function
in self
._typeinfo
_addresses
:
772 """Represents a heap profile dump."""
774 _PATH_PATTERN
= re
.compile(r
'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')
776 _HOOK_PATTERN
= re
.compile(
777 r
'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+'
778 r
'(hooked|unhooked)\s+(.+)$', re
.IGNORECASE
)
780 _TIME_PATTERN
= re
.compile(
781 r
'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?')
783 def __init__(self
, path
, modified_time
):
785 matched
= self
._PATH
_PATTERN
.match(path
)
786 self
._pid
= int(matched
.group(2))
787 self
._count
= int(matched
.group(3))
788 self
._time
= modified_time
790 self
._procmaps
= ExclusiveRangeDict(ProcMapsEntryAttribute
)
791 self
._stacktrace
_lines
= []
792 self
._global
_stats
= {} # used only in apply_policy
811 for region
in sorted(self
._map
.iteritems()):
812 yield region
[0], region
[1]
814 def iter_procmaps(self
):
815 for begin
, end
, attr
in self
._map
.iter_range():
816 yield begin
, end
, attr
819 def iter_stacktrace(self
):
820 for line
in self
._stacktrace
_lines
:
823 def global_stat(self
, name
):
824 return self
._global
_stats
[name
]
827 def load(path
, log_header
='Loading a heap profile dump: '):
828 """Loads a heap profile dump.
831 path: A file path string to load.
832 log_header: A preceding string for log messages.
835 A loaded Dump object.
838 ParsingException for invalid heap profile dumps.
840 dump
= Dump(path
, os
.stat(path
).st_mtime
)
841 with
open(path
, 'r') as f
:
842 dump
.load_file(f
, log_header
)
845 def load_file(self
, f
, log_header
):
846 self
._lines
= [line
for line
in f
847 if line
and not line
.startswith('#')]
850 self
._version
, ln
= self
._parse
_version
()
851 self
._parse
_meta
_information
()
852 if self
._version
== DUMP_DEEP_6
:
853 self
._parse
_mmap
_list
()
854 self
._parse
_global
_stats
()
855 self
._extract
_stacktrace
_lines
(ln
)
856 except EmptyDumpException
:
857 LOGGER
.info('%s%s ...ignored an empty dump.' % (log_header
, self
._path
))
858 except ParsingException
, e
:
859 LOGGER
.error('%s%s ...error %s' % (log_header
, self
._path
, e
))
862 LOGGER
.info('%s%s (version:%s)' % (log_header
, self
._path
, self
._version
))
864 def _parse_version(self
):
865 """Parses a version string in self._lines.
868 A pair of (a string representing a version of the stacktrace dump,
869 and an integer indicating a line number next to the version string).
872 ParsingException for invalid dump versions.
876 # Skip until an identifiable line.
877 headers
= ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
879 raise EmptyDumpException('Empty heap dump file.')
880 (ln
, found
) = skip_while(
882 lambda n
: not self
._lines
[n
].startswith(headers
))
884 raise InvalidDumpException('No version header.')
886 # Identify a version.
887 if self
._lines
[ln
].startswith('heap profile: '):
888 version
= self
._lines
[ln
][13:].strip()
889 if version
in (DUMP_DEEP_5
, DUMP_DEEP_6
):
890 (ln
, _
) = skip_while(
891 ln
, len(self
._lines
),
892 lambda n
: self
._lines
[n
] != 'STACKTRACES:\n')
893 elif version
in DUMP_DEEP_OBSOLETE
:
894 raise ObsoleteDumpVersionException(version
)
896 raise InvalidDumpException('Invalid version: %s' % version
)
897 elif self
._lines
[ln
] == 'STACKTRACES:\n':
898 raise ObsoleteDumpVersionException(DUMP_DEEP_1
)
899 elif self
._lines
[ln
] == 'MMAP_STACKTRACES:\n':
900 raise ObsoleteDumpVersionException(DUMP_DEEP_2
)
904 def _parse_global_stats(self
):
905 """Parses lines in self._lines as global stats."""
906 (ln
, _
) = skip_while(
908 lambda n
: self
._lines
[n
] != 'GLOBAL_STATS:\n')
910 global_stat_names
= [
911 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack',
912 'other', 'nonprofiled-absent', 'nonprofiled-anonymous',
913 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
914 'nonprofiled-stack', 'nonprofiled-other',
915 'profiled-mmap', 'profiled-malloc']
917 for prefix
in global_stat_names
:
918 (ln
, _
) = skip_while(
919 ln
, len(self
._lines
),
920 lambda n
: self
._lines
[n
].split()[0] != prefix
)
921 words
= self
._lines
[ln
].split()
922 self
._global
_stats
[prefix
+ '_virtual'] = int(words
[-2])
923 self
._global
_stats
[prefix
+ '_committed'] = int(words
[-1])
925 def _parse_meta_information(self
):
926 """Parses lines in self._lines for meta information."""
927 (ln
, found
) = skip_while(
929 lambda n
: self
._lines
[n
] != 'META:\n')
935 if self
._lines
[ln
].startswith('Time:'):
936 matched
= self
._TIME
_PATTERN
.match(self
._lines
[ln
])
938 self
._time
= time
.mktime(datetime
.datetime
.strptime(
939 matched
.group(1), '%Y/%m/%d %H:%M:%S').timetuple())
941 self
._time
+= float(matched
.group(2)[1:]) / 1000.0
946 def _parse_mmap_list(self
):
947 """Parses lines in self._lines as a mmap list."""
948 (ln
, found
) = skip_while(
950 lambda n
: self
._lines
[n
] != 'MMAP_LIST:\n')
957 entry
= proc_maps
.ProcMaps
.parse_line(self
._lines
[ln
])
959 for _
, _
, attr
in self
._procmaps
.iter_range(entry
.begin
, entry
.end
):
960 for key
, value
in entry
.as_dict().iteritems():
964 matched
= self
._HOOK
_PATTERN
.match(self
._lines
[ln
])
967 # 2: starting address
969 # 7: hooked or unhooked
970 # 8: additional information
971 self
._map
[(int(matched
.group(2), 16),
972 int(matched
.group(5), 16))] = (matched
.group(7),
976 def _extract_stacktrace_lines(self
, line_number
):
977 """Extracts the position of stacktrace lines.
979 Valid stacktrace lines are stored into self._stacktrace_lines.
982 line_number: A line number to start parsing in lines.
985 ParsingException for invalid dump versions.
987 if self
._version
in (DUMP_DEEP_5
, DUMP_DEEP_6
):
988 (line_number
, _
) = skip_while(
989 line_number
, len(self
._lines
),
990 lambda n
: not self
._lines
[n
].split()[0].isdigit())
991 stacktrace_start
= line_number
992 (line_number
, _
) = skip_while(
993 line_number
, len(self
._lines
),
994 lambda n
: self
._check
_stacktrace
_line
(self
._lines
[n
]))
995 self
._stacktrace
_lines
= self
._lines
[stacktrace_start
:line_number
]
997 elif self
._version
in DUMP_DEEP_OBSOLETE
:
998 raise ObsoleteDumpVersionException(self
._version
)
1001 raise InvalidDumpException('Invalid version: %s' % self
._version
)
1004 def _check_stacktrace_line(stacktrace_line
):
1005 """Checks if a given stacktrace_line is valid as stacktrace.
1008 stacktrace_line: A string to be checked.
1011 True if the given stacktrace_line is valid.
1013 words
= stacktrace_line
.split()
1014 if len(words
) < BUCKET_ID
+ 1:
1016 if words
[BUCKET_ID
- 1] != '@':
1021 class DumpList(object):
1022 """Represents a sequence of heap profile dumps."""
1024 def __init__(self
, dump_list
):
1025 self
._dump
_list
= dump_list
1028 def load(path_list
):
1029 LOGGER
.info('Loading heap dump profiles.')
1031 for path
in path_list
:
1032 dump_list
.append(Dump
.load(path
, ' '))
1033 return DumpList(dump_list
)
1036 return len(self
._dump
_list
)
1039 for dump
in self
._dump
_list
:
1042 def __getitem__(self
, index
):
1043 return self
._dump
_list
[index
]
1046 class Command(object):
1047 """Subclasses are a subcommand for this executable.
1049 See COMMANDS in main().
1051 def __init__(self
, usage
):
1052 self
._parser
= optparse
.OptionParser(usage
)
1055 def load_basic_files(
1056 dump_path
, multiple
, no_dump
=False, fake_directories
=None):
1057 prefix
= Command
._find
_prefix
(dump_path
)
1058 symbol_data_sources
= SymbolDataSources(prefix
, fake_directories
or {})
1059 symbol_data_sources
.prepare()
1060 bucket_set
= BucketSet()
1061 bucket_set
.load(prefix
)
1064 dump_list
= DumpList
.load(Command
._find
_all
_dumps
(dump_path
))
1066 dump
= Dump
.load(dump_path
)
1067 symbol_mapping_cache
= SymbolMappingCache()
1068 with
open(prefix
+ '.cache.function', 'a+') as cache_f
:
1069 symbol_mapping_cache
.update(
1070 FUNCTION_SYMBOLS
, bucket_set
,
1071 SymbolFinder(FUNCTION_SYMBOLS
, symbol_data_sources
), cache_f
)
1072 with
open(prefix
+ '.cache.typeinfo', 'a+') as cache_f
:
1073 symbol_mapping_cache
.update(
1074 TYPEINFO_SYMBOLS
, bucket_set
,
1075 SymbolFinder(TYPEINFO_SYMBOLS
, symbol_data_sources
), cache_f
)
1076 with
open(prefix
+ '.cache.sourcefile', 'a+') as cache_f
:
1077 symbol_mapping_cache
.update(
1078 SOURCEFILE_SYMBOLS
, bucket_set
,
1079 SymbolFinder(SOURCEFILE_SYMBOLS
, symbol_data_sources
), cache_f
)
1080 bucket_set
.symbolize(symbol_mapping_cache
)
1084 return (bucket_set
, dump_list
)
1086 return (bucket_set
, dump
)
1089 def _find_prefix(path
):
1090 return re
.sub('\.[0-9][0-9][0-9][0-9]\.heap', '', path
)
1093 def _find_all_dumps(dump_path
):
1094 prefix
= Command
._find
_prefix
(dump_path
)
1095 dump_path_list
= [dump_path
]
1097 n
= int(dump_path
[len(dump_path
) - 9 : len(dump_path
) - 5])
1100 p
= '%s.%04d.heap' % (prefix
, n
)
1101 if os
.path
.exists(p
):
1102 dump_path_list
.append(p
)
1107 return dump_path_list
1110 def _find_all_buckets(dump_path
):
1111 prefix
= Command
._find
_prefix
(dump_path
)
1112 bucket_path_list
= []
1116 path
= '%s.%04d.buckets' % (prefix
, n
)
1117 if not os
.path
.exists(path
):
1122 bucket_path_list
.append(path
)
1125 return bucket_path_list
1127 def _parse_args(self
, sys_argv
, required
):
1128 options
, args
= self
._parser
.parse_args(sys_argv
)
1129 if len(args
) != required
+ 1:
1130 self
._parser
.error('needs %d argument(s).\n' % required
)
1132 return (options
, args
)
1135 def _parse_policy_list(options_policy
):
1137 return options_policy
.split(',')
1142 class BucketsCommand(Command
):
1144 super(BucketsCommand
, self
).__init
__('Usage: %prog buckets <first-dump>')
1146 def do(self
, sys_argv
, out
=sys
.stdout
):
1147 _
, args
= self
._parse
_args
(sys_argv
, 1)
1149 bucket_set
= Command
.load_basic_files(dump_path
, True, True)
1151 BucketsCommand
._output
(bucket_set
, out
)
1155 def _output(bucket_set
, out
):
1156 """Prints all buckets with resolving symbols.
1159 bucket_set: A BucketSet object.
1160 out: An IO object to output.
1162 for bucket_id
, bucket
in sorted(bucket_set
):
1163 out
.write('%d: %s\n' % (bucket_id
, bucket
))
1166 class StacktraceCommand(Command
):
1168 super(StacktraceCommand
, self
).__init
__(
1169 'Usage: %prog stacktrace <dump>')
1171 def do(self
, sys_argv
):
1172 _
, args
= self
._parse
_args
(sys_argv
, 1)
1174 (bucket_set
, dump
) = Command
.load_basic_files(dump_path
, False)
1176 StacktraceCommand
._output
(dump
, bucket_set
, sys
.stdout
)
1180 def _output(dump
, bucket_set
, out
):
1181 """Outputs a given stacktrace.
1184 bucket_set: A BucketSet object.
1185 out: A file object to output.
1187 for line
in dump
.iter_stacktrace
:
1188 words
= line
.split()
1189 bucket
= bucket_set
.get(int(words
[BUCKET_ID
]))
1192 for i
in range(0, BUCKET_ID
- 1):
1193 out
.write(words
[i
] + ' ')
1194 for frame
in bucket
.symbolized_stackfunction
:
1195 out
.write(frame
+ ' ')
1199 class PolicyCommands(Command
):
1200 def __init__(self
, command
):
1201 super(PolicyCommands
, self
).__init
__(
1202 'Usage: %%prog %s [-p POLICY] <first-dump>' % command
)
1203 self
._parser
.add_option('-p', '--policy', type='string', dest
='policy',
1204 help='profile with POLICY', metavar
='POLICY')
1205 self
._parser
.add_option('--fake-directories', dest
='fake_directories',
1206 metavar
='/path/on/target@/path/on/host[:...]',
1207 help='Read files in /path/on/host/ instead of '
1208 'files in /path/on/target/.')
1210 def _set_up(self
, sys_argv
):
1211 options
, args
= self
._parse
_args
(sys_argv
, 1)
1213 fake_directories_dict
= {}
1214 if options
.fake_directories
:
1215 for fake_directory_pair
in options
.fake_directories
.split(':'):
1216 target_path
, host_path
= fake_directory_pair
.split('@', 1)
1217 fake_directories_dict
[target_path
] = host_path
1218 (bucket_set
, dumps
) = Command
.load_basic_files(
1219 dump_path
, True, fake_directories
=fake_directories_dict
)
1221 policy_set
= PolicySet
.load(Command
._parse
_policy
_list
(options
.policy
))
1222 return policy_set
, dumps
, bucket_set
1225 def _apply_policy(dump
, policy
, bucket_set
, first_dump_time
):
1226 """Aggregates the total memory size of each component.
1228 Iterate through all stacktraces and attribute them to one of the components
1229 based on the policy. It is important to apply policy in right order.
1232 dump: A Dump object.
1233 policy: A Policy object.
1234 bucket_set: A BucketSet object.
1235 first_dump_time: An integer representing time when the first dump is
1239 A dict mapping components and their corresponding sizes.
1241 LOGGER
.info(' %s' % dump
.path
)
1242 sizes
= dict((c
, 0) for c
in policy
.components
)
1244 PolicyCommands
._accumulate
(dump
, policy
, bucket_set
, sizes
)
1246 sizes
['mmap-no-log'] = (
1247 dump
.global_stat('profiled-mmap_committed') -
1248 sizes
['mmap-total-log'])
1249 sizes
['mmap-total-record'] = dump
.global_stat('profiled-mmap_committed')
1250 sizes
['mmap-total-record-vm'] = dump
.global_stat('profiled-mmap_virtual')
1252 sizes
['tc-no-log'] = (
1253 dump
.global_stat('profiled-malloc_committed') -
1254 sizes
['tc-total-log'])
1255 sizes
['tc-total-record'] = dump
.global_stat('profiled-malloc_committed')
1256 sizes
['tc-unused'] = (
1257 sizes
['mmap-tcmalloc'] -
1258 dump
.global_stat('profiled-malloc_committed'))
1259 sizes
['tc-total'] = sizes
['mmap-tcmalloc']
1262 'total': 'total_committed',
1263 'filemapped': 'file_committed',
1264 'absent': 'absent_committed',
1265 'file-exec': 'file-exec_committed',
1266 'file-nonexec': 'file-nonexec_committed',
1267 'anonymous': 'anonymous_committed',
1268 'stack': 'stack_committed',
1269 'other': 'other_committed',
1270 'unhooked-absent': 'nonprofiled-absent_committed',
1271 'unhooked-anonymous': 'nonprofiled-anonymous_committed',
1272 'unhooked-file-exec': 'nonprofiled-file-exec_committed',
1273 'unhooked-file-nonexec': 'nonprofiled-file-nonexec_committed',
1274 'unhooked-stack': 'nonprofiled-stack_committed',
1275 'unhooked-other': 'nonprofiled-other_committed',
1276 'total-vm': 'total_virtual',
1277 'filemapped-vm': 'file_virtual',
1278 'anonymous-vm': 'anonymous_virtual',
1279 'other-vm': 'other_virtual' }.iteritems():
1281 sizes
[key
] = dump
.global_stat(value
)
1283 if 'mustbezero' in sizes
:
1285 'profiled-mmap_committed',
1286 'nonprofiled-absent_committed',
1287 'nonprofiled-anonymous_committed',
1288 'nonprofiled-file-exec_committed',
1289 'nonprofiled-file-nonexec_committed',
1290 'nonprofiled-stack_committed',
1291 'nonprofiled-other_committed')
1292 sizes
['mustbezero'] = (
1293 dump
.global_stat('total_committed') -
1294 sum(dump
.global_stat(removed
) for removed
in removed_list
))
1295 if 'total-exclude-profiler' in sizes
:
1296 sizes
['total-exclude-profiler'] = (
1297 dump
.global_stat('total_committed') -
1298 (sizes
['mmap-profiler'] + sizes
['mmap-type-profiler']))
1300 sizes
['hour'] = (dump
.time
- first_dump_time
) / 60.0 / 60.0
1301 if 'minute' in sizes
:
1302 sizes
['minute'] = (dump
.time
- first_dump_time
) / 60.0
1303 if 'second' in sizes
:
1304 sizes
['second'] = dump
.time
- first_dump_time
1309 def _accumulate(dump
, policy
, bucket_set
, sizes
):
1310 for line
in dump
.iter_stacktrace
:
1311 words
= line
.split()
1312 bucket
= bucket_set
.get(int(words
[BUCKET_ID
]))
1313 component_match
= policy
.find(bucket
)
1314 sizes
[component_match
] += int(words
[COMMITTED
])
1316 if component_match
.startswith('tc-'):
1317 sizes
['tc-total-log'] += int(words
[COMMITTED
])
1318 elif component_match
.startswith('mmap-'):
1319 sizes
['mmap-total-log'] += int(words
[COMMITTED
])
1321 sizes
['other-total-log'] += int(words
[COMMITTED
])
1324 class CSVCommand(PolicyCommands
):
1326 super(CSVCommand
, self
).__init
__('csv')
1328 def do(self
, sys_argv
):
1329 policy_set
, dumps
, bucket_set
= self
._set
_up
(sys_argv
)
1330 return CSVCommand
._output
(policy_set
, dumps
, bucket_set
, sys
.stdout
)
1333 def _output(policy_set
, dumps
, bucket_set
, out
):
1335 for label
in policy_set
:
1336 max_components
= max(max_components
, len(policy_set
[label
].components
))
1338 for label
in sorted(policy_set
):
1339 components
= policy_set
[label
].components
1340 if len(policy_set
) > 1:
1341 out
.write('%s%s\n' % (label
, ',' * (max_components
- 1)))
1342 out
.write('%s%s\n' % (
1343 ','.join(components
), ',' * (max_components
- len(components
))))
1345 LOGGER
.info('Applying a policy %s to...' % label
)
1347 component_sizes
= PolicyCommands
._apply
_policy
(
1348 dump
, policy_set
[label
], bucket_set
, dumps
[0].time
)
1350 for c
in components
:
1351 if c
in ('hour', 'minute', 'second'):
1352 s
.append('%05.5f' % (component_sizes
[c
]))
1354 s
.append('%05.5f' % (component_sizes
[c
] / 1024.0 / 1024.0))
1355 out
.write('%s%s\n' % (
1356 ','.join(s
), ',' * (max_components
- len(components
))))
1358 bucket_set
.clear_component_cache()
1363 class JSONCommand(PolicyCommands
):
1365 super(JSONCommand
, self
).__init
__('json')
1367 def do(self
, sys_argv
):
1368 policy_set
, dumps
, bucket_set
= self
._set
_up
(sys_argv
)
1369 return JSONCommand
._output
(policy_set
, dumps
, bucket_set
, sys
.stdout
)
1372 def _output(policy_set
, dumps
, bucket_set
, out
):
1374 'version': 'JSON_DEEP_2',
1378 for label
in sorted(policy_set
):
1379 json_base
['policies'][label
] = {
1380 'legends': policy_set
[label
].components
,
1384 LOGGER
.info('Applying a policy %s to...' % label
)
1386 component_sizes
= PolicyCommands
._apply
_policy
(
1387 dump
, policy_set
[label
], bucket_set
, dumps
[0].time
)
1388 component_sizes
['dump_path'] = dump
.path
1389 component_sizes
['dump_time'] = datetime
.datetime
.fromtimestamp(
1390 dump
.time
).strftime('%Y-%m-%d %H:%M:%S')
1391 json_base
['policies'][label
]['snapshots'].append(component_sizes
)
1393 bucket_set
.clear_component_cache()
1395 json
.dump(json_base
, out
, indent
=2, sort_keys
=True)
1400 class ListCommand(PolicyCommands
):
1402 super(ListCommand
, self
).__init
__('list')
1404 def do(self
, sys_argv
):
1405 policy_set
, dumps
, bucket_set
= self
._set
_up
(sys_argv
)
1406 return ListCommand
._output
(policy_set
, dumps
, bucket_set
, sys
.stdout
)
1409 def _output(policy_set
, dumps
, bucket_set
, out
):
1410 for label
in sorted(policy_set
):
1411 LOGGER
.info('Applying a policy %s to...' % label
)
1413 component_sizes
= PolicyCommands
._apply
_policy
(
1414 dump
, policy_set
[label
], bucket_set
, dump
.time
)
1415 out
.write('%s for %s:\n' % (label
, dump
.path
))
1416 for c
in policy_set
[label
].components
:
1417 if c
in ['hour', 'minute', 'second']:
1418 out
.write('%40s %12.3f\n' % (c
, component_sizes
[c
]))
1420 out
.write('%40s %12d\n' % (c
, component_sizes
[c
]))
1422 bucket_set
.clear_component_cache()
1427 class MapCommand(Command
):
1429 super(MapCommand
, self
).__init
__('Usage: %prog map <first-dump> <policy>')
1431 def do(self
, sys_argv
, out
=sys
.stdout
):
1432 _
, args
= self
._parse
_args
(sys_argv
, 2)
1434 target_policy
= args
[2]
1435 (bucket_set
, dumps
) = Command
.load_basic_files(dump_path
, True)
1436 policy_set
= PolicySet
.load(Command
._parse
_policy
_list
(target_policy
))
1438 MapCommand
._output
(dumps
, bucket_set
, policy_set
[target_policy
], out
)
1442 def _output(dumps
, bucket_set
, policy
, out
):
1443 """Prints all stacktraces in a given component of given depth.
1446 dumps: A list of Dump objects.
1447 bucket_set: A BucketSet object.
1448 policy: A Policy object.
1449 out: An IO object to output.
1452 range_dict
= ExclusiveRangeDict(ListAttribute
)
1454 max_dump_count
= max(max_dump_count
, dump
.count
)
1455 for key
, value
in dump
.iter_map
:
1456 for begin
, end
, attr
in range_dict
.iter_range(key
[0], key
[1]):
1457 attr
[dump
.count
] = value
1459 max_dump_count_digit
= len(str(max_dump_count
))
1460 for begin
, end
, attr
in range_dict
.iter_range():
1461 out
.write('%x-%x\n' % (begin
, end
))
1462 if len(attr
) < max_dump_count
:
1463 attr
[max_dump_count
] = None
1464 for index
, x
in enumerate(attr
[1:]):
1465 out
.write(' #%0*d: ' % (max_dump_count_digit
, index
+ 1))
1468 elif x
[0] == 'hooked':
1469 attrs
= x
[1].split()
1470 assert len(attrs
) == 3
1471 bucket_id
= int(attrs
[2])
1472 bucket
= bucket_set
.get(bucket_id
)
1473 component
= policy
.find(bucket
)
1474 out
.write('hooked %s: %s @ %d\n' % (attrs
[0], component
, bucket_id
))
1476 attrs
= x
[1].split()
1477 size
= int(attrs
[1])
1478 out
.write('unhooked %s: %d bytes committed\n' % (attrs
[0], size
))
1481 class ExpandCommand(Command
):
1483 super(ExpandCommand
, self
).__init
__(
1484 'Usage: %prog expand <dump> <policy> <component> <depth>')
1486 def do(self
, sys_argv
):
1487 _
, args
= self
._parse
_args
(sys_argv
, 4)
1489 target_policy
= args
[2]
1490 component_name
= args
[3]
1492 (bucket_set
, dump
) = Command
.load_basic_files(dump_path
, False)
1493 policy_set
= PolicySet
.load(Command
._parse
_policy
_list
(target_policy
))
1495 ExpandCommand
._output
(dump
, policy_set
[target_policy
], bucket_set
,
1496 component_name
, int(depth
), sys
.stdout
)
1500 def _output(dump
, policy
, bucket_set
, component_name
, depth
, out
):
1501 """Prints all stacktraces in a given component of given depth.
1504 dump: A Dump object.
1505 policy: A Policy object.
1506 bucket_set: A BucketSet object.
1507 component_name: A name of component for filtering.
1508 depth: An integer representing depth to be printed.
1509 out: An IO object to output.
1513 ExpandCommand
._accumulate
(
1514 dump
, policy
, bucket_set
, component_name
, depth
, sizes
)
1516 sorted_sizes_list
= sorted(
1517 sizes
.iteritems(), key
=(lambda x
: x
[1]), reverse
=True)
1519 # TODO(dmikurube): Better formatting.
1520 for size_pair
in sorted_sizes_list
:
1521 out
.write('%10d %s\n' % (size_pair
[1], size_pair
[0]))
1522 total
+= size_pair
[1]
1523 LOGGER
.info('total: %d\n' % total
)
1526 def _accumulate(dump
, policy
, bucket_set
, component_name
, depth
, sizes
):
1527 for line
in dump
.iter_stacktrace
:
1528 words
= line
.split()
1529 bucket
= bucket_set
.get(int(words
[BUCKET_ID
]))
1530 component_match
= policy
.find(bucket
)
1531 if component_match
== component_name
:
1532 stacktrace_sequence
= ''
1534 stacktrace_sequence
+= '(type=%s)' % bucket
.symbolized_typeinfo
1535 stacktrace_sequence
+= ' (type.name=%s) ' % bucket
.typeinfo_name
1536 for function
, sourcefile
in zip(
1537 bucket
.symbolized_stackfunction
[
1538 0 : min(len(bucket
.symbolized_stackfunction
), 1 + depth
)],
1539 bucket
.symbolized_stacksourcefile
[
1540 0 : min(len(bucket
.symbolized_stacksourcefile
), 1 + depth
)]):
1541 stacktrace_sequence
+= '%s(@%s) ' % (function
, sourcefile
)
1542 if not stacktrace_sequence
in sizes
:
1543 sizes
[stacktrace_sequence
] = 0
1544 sizes
[stacktrace_sequence
] += int(words
[COMMITTED
])
1547 class PProfCommand(Command
):
1549 super(PProfCommand
, self
).__init
__(
1550 'Usage: %prog pprof [-c COMPONENT] <dump> <policy>')
1551 self
._parser
.add_option('-c', '--component', type='string',
1553 help='restrict to COMPONENT', metavar
='COMPONENT')
1555 def do(self
, sys_argv
):
1556 options
, args
= self
._parse
_args
(sys_argv
, 2)
1559 target_policy
= args
[2]
1560 component
= options
.component
1562 (bucket_set
, dump
) = Command
.load_basic_files(dump_path
, False)
1563 policy_set
= PolicySet
.load(Command
._parse
_policy
_list
(target_policy
))
1565 with
open(Command
._find
_prefix
(dump_path
) + '.maps', 'r') as maps_f
:
1566 maps_lines
= maps_f
.readlines()
1567 PProfCommand
._output
(
1568 dump
, policy_set
[target_policy
], bucket_set
, maps_lines
, component
,
1574 def _output(dump
, policy
, bucket_set
, maps_lines
, component_name
, out
):
1575 """Converts the heap profile dump so it can be processed by pprof.
1578 dump: A Dump object.
1579 policy: A Policy object.
1580 bucket_set: A BucketSet object.
1581 maps_lines: A list of strings containing /proc/.../maps.
1582 component_name: A name of component for filtering.
1583 out: An IO object to output.
1585 out
.write('heap profile: ')
1586 com_committed
, com_allocs
= PProfCommand
._accumulate
(
1587 dump
, policy
, bucket_set
, component_name
)
1589 out
.write('%6d: %8s [%6d: %8s] @ heapprofile\n' % (
1590 com_allocs
, com_committed
, com_allocs
, com_committed
))
1592 PProfCommand
._output
_stacktrace
_lines
(
1593 dump
, policy
, bucket_set
, component_name
, out
)
1595 out
.write('MAPPED_LIBRARIES:\n')
1596 for line
in maps_lines
:
1600 def _accumulate(dump
, policy
, bucket_set
, component_name
):
1601 """Accumulates size of committed chunks and the number of allocated chunks.
1604 dump: A Dump object.
1605 policy: A Policy object.
1606 bucket_set: A BucketSet object.
1607 component_name: A name of component for filtering.
1610 Two integers which are the accumulated size of committed regions and the
1611 number of allocated chunks, respectively.
1615 for line
in dump
.iter_stacktrace
:
1616 words
= line
.split()
1617 bucket
= bucket_set
.get(int(words
[BUCKET_ID
]))
1619 (component_name
and component_name
!= policy
.find(bucket
))):
1622 com_committed
+= int(words
[COMMITTED
])
1623 com_allocs
+= int(words
[ALLOC_COUNT
]) - int(words
[FREE_COUNT
])
1625 return com_committed
, com_allocs
1628 def _output_stacktrace_lines(dump
, policy
, bucket_set
, component_name
, out
):
1629 """Prints information of stacktrace lines for pprof.
1632 dump: A Dump object.
1633 policy: A Policy object.
1634 bucket_set: A BucketSet object.
1635 component_name: A name of component for filtering.
1636 out: An IO object to output.
1638 for line
in dump
.iter_stacktrace
:
1639 words
= line
.split()
1640 bucket
= bucket_set
.get(int(words
[BUCKET_ID
]))
1642 (component_name
and component_name
!= policy
.find(bucket
))):
1645 out
.write('%6d: %8s [%6d: %8s] @' % (
1646 int(words
[ALLOC_COUNT
]) - int(words
[FREE_COUNT
]),
1648 int(words
[ALLOC_COUNT
]) - int(words
[FREE_COUNT
]),
1650 for address
in bucket
.stacktrace
:
1651 out
.write(' 0x%016x' % address
)
1655 class UploadCommand(Command
):
1657 super(UploadCommand
, self
).__init
__(
1658 'Usage: %prog upload [--gsutil path/to/gsutil] '
1659 '<first-dump> <destination-gs-path>')
1660 self
._parser
.add_option('--gsutil', default
='gsutil',
1661 help='path to GSUTIL', metavar
='GSUTIL')
1663 def do(self
, sys_argv
):
1664 options
, args
= self
._parse
_args
(sys_argv
, 2)
1668 dump_files
= Command
._find
_all
_dumps
(dump_path
)
1669 bucket_files
= Command
._find
_all
_buckets
(dump_path
)
1670 prefix
= Command
._find
_prefix
(dump_path
)
1671 symbol_data_sources
= SymbolDataSources(prefix
)
1672 symbol_data_sources
.prepare()
1673 symbol_path
= symbol_data_sources
.path()
1675 handle_zip
, filename_zip
= tempfile
.mkstemp('.zip', 'dmprof')
1676 os
.close(handle_zip
)
1679 file_zip
= zipfile
.ZipFile(filename_zip
, 'w', zipfile
.ZIP_DEFLATED
)
1680 for filename
in dump_files
:
1681 file_zip
.write(filename
, os
.path
.basename(os
.path
.abspath(filename
)))
1682 for filename
in bucket_files
:
1683 file_zip
.write(filename
, os
.path
.basename(os
.path
.abspath(filename
)))
1685 symbol_basename
= os
.path
.basename(os
.path
.abspath(symbol_path
))
1686 for filename
in os
.listdir(symbol_path
):
1687 if not filename
.startswith('.'):
1688 file_zip
.write(os
.path
.join(symbol_path
, filename
),
1689 os
.path
.join(symbol_basename
, os
.path
.basename(
1690 os
.path
.abspath(filename
))))
1693 returncode
= UploadCommand
._run
_gsutil
(
1694 options
.gsutil
, 'cp', '-a', 'public-read', filename_zip
, gs_path
)
1696 os
.remove(filename_zip
)
1701 def _run_gsutil(gsutil
, *args
):
1702 """Run gsutil as a subprocess.
1705 *args: Arguments to pass to gsutil. The first argument should be an
1706 operation such as ls, cp or cat.
1708 The return code from the process.
1710 command
= [gsutil
] + list(args
)
1711 LOGGER
.info("Running: %s", command
)
1714 return subprocess
.call(command
)
1716 LOGGER
.error('Error to run gsutil: %s', e
)
1721 'buckets': BucketsCommand
,
1723 'expand': ExpandCommand
,
1724 'json': JSONCommand
,
1725 'list': ListCommand
,
1727 'pprof': PProfCommand
,
1728 'stacktrace': StacktraceCommand
,
1729 'upload': UploadCommand
,
1732 if len(sys
.argv
) < 2 or (not sys
.argv
[1] in COMMANDS
):
1733 sys
.stderr
.write("""Usage: dmprof <command> [options] [<args>]
1736 buckets Dump a bucket list with resolving symbols
1737 csv Classify memory usage in CSV
1738 expand Show all stacktraces contained in the specified component
1739 json Classify memory usage in JSON
1740 list Classify memory usage in simple listing format
1741 map Show history of mapped regions
1742 pprof Format the profile dump so that it can be processed by pprof
1743 stacktrace Convert runtime addresses to symbol names
1744 upload Upload dumped files
1747 dmprof buckets <first-dump>
1748 dmprof csv [-p POLICY] <first-dump>
1749 dmprof expand <dump> <policy> <component> <depth>
1750 dmprof json [-p POLICY] <first-dump>
1751 dmprof list [-p POLICY] <first-dump>
1752 dmprof map <first-dump> <policy>
1753 dmprof pprof [-c COMPONENT] <dump> <policy>
1754 dmprof stacktrace <dump>
1755 dmprof upload [--gsutil path/to/gsutil] <first-dump> <destination-gs-path>
1758 action
= sys
.argv
.pop(1)
1760 LOGGER
.setLevel(logging
.DEBUG
)
1761 handler
= logging
.StreamHandler()
1762 handler
.setLevel(logging
.INFO
)
1763 formatter
= logging
.Formatter('%(message)s')
1764 handler
.setFormatter(formatter
)
1765 LOGGER
.addHandler(handler
)
1768 errorcode
= COMMANDS
[action
]().do(sys
.argv
)
1769 except ParsingException
, e
:
1771 sys
.stderr
.write('Exit by parsing error: %s\n' % e
)
1776 if __name__
== '__main__':