1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
12 from lib
.dump
import Dump
13 from lib
.exceptions
import EmptyDumpException
, InvalidDumpException
14 from lib
.exceptions
import ObsoleteDumpVersionException
, ParsingException
15 from lib
.pageframe
import PageFrame
16 from lib
.range_dict
import ExclusiveRangeDict
17 from lib
.symbol
import procfs
20 LOGGER
= logging
.getLogger('dmprof')
21 VIRTUAL
, COMMITTED
, ALLOC_COUNT
, FREE_COUNT
, _AT
, BUCKET_ID
= range(6)
24 # Heap Profile Dump versions
26 # DUMP_DEEP_[1-4] are obsolete.
27 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks.
28 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps.
29 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".
30 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.
31 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.
32 DUMP_DEEP_1
= 'DUMP_DEEP_1'
33 DUMP_DEEP_2
= 'DUMP_DEEP_2'
34 DUMP_DEEP_3
= 'DUMP_DEEP_3'
35 DUMP_DEEP_4
= 'DUMP_DEEP_4'
37 DUMP_DEEP_OBSOLETE
= (DUMP_DEEP_1
, DUMP_DEEP_2
, DUMP_DEEP_3
, DUMP_DEEP_4
)
39 # DUMP_DEEP_5 doesn't separate sections for malloc and mmap.
40 # malloc and mmap are identified in bucket files.
41 # DUMP_DEEP_5 should be processed by POLICY_DEEP_4.
42 DUMP_DEEP_5
= 'DUMP_DEEP_5'
44 # DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5.
45 DUMP_DEEP_6
= 'DUMP_DEEP_6'
49 """Represents a heap profile dump."""
51 _PATH_PATTERN
= re
.compile(r
'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')
53 _HOOK_PATTERN
= re
.compile(
54 r
'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+'
55 r
'(hooked|unhooked)\s+(.+)$', re
.IGNORECASE
)
57 _HOOKED_PATTERN
= re
.compile(r
'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
58 '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)')
59 _UNHOOKED_PATTERN
= re
.compile(r
'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
60 '(?P<RESERVED>[0-9]+)')
62 _OLD_HOOKED_PATTERN
= re
.compile(r
'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)')
63 _OLD_UNHOOKED_PATTERN
= re
.compile(r
'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)')
65 _TIME_PATTERN_FORMAT
= re
.compile(
66 r
'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?')
67 _TIME_PATTERN_SECONDS
= re
.compile(r
'^Time: ([0-9]+)$')
69 def __init__(self
, path
, modified_time
):
70 super(DeepDump
, self
).__init
__()
72 matched
= self
._PATH
_PATTERN
.match(path
)
73 self
._pid
= int(matched
.group(2))
74 self
._count
= int(matched
.group(3))
75 self
._time
= modified_time
77 self
._procmaps
= ExclusiveRangeDict(ProcMapsEntryAttribute
)
78 self
._stacktrace
_lines
= []
79 self
._global
_stats
= {} # used only in apply_policy
83 self
._pageframe
_length
= 0
84 self
._pageframe
_encoding
= ''
85 self
._has
_pagecount
= False
104 for region
in sorted(self
._map
.iteritems()):
105 yield region
[0], region
[1]
108 def iter_stacktrace(self
):
109 for line
in self
._stacktrace
_lines
:
111 yield (int(words
[BUCKET_ID
]),
113 int(words
[COMMITTED
]),
114 int(words
[ALLOC_COUNT
]),
115 int(words
[FREE_COUNT
]))
117 def global_stat(self
, name
):
118 return self
._global
_stats
[name
]
126 return self
._pagesize
129 def pageframe_length(self
):
130 return self
._pageframe
_length
133 def pageframe_encoding(self
):
134 return self
._pageframe
_encoding
137 def has_pagecount(self
):
138 return self
._has
_pagecount
141 def load(path
, log_header
='Loading a heap profile dump: '):
142 """Loads a heap profile dump.
145 path: A file path string to load.
146 log_header: A preceding string for log messages.
149 A loaded Dump object.
152 ParsingException for invalid heap profile dumps.
154 dump
= Dump(path
, os
.stat(path
).st_mtime
)
155 with
open(path
, 'r') as f
:
156 dump
.load_file(f
, log_header
)
159 def load_file(self
, f
, log_header
):
160 self
._lines
= [line
for line
in f
161 if line
and not line
.startswith('#')]
164 self
._version
, ln
= self
._parse
_version
()
165 self
._parse
_meta
_information
()
166 if self
._version
== DUMP_DEEP_6
:
167 self
._parse
_mmap
_list
()
168 self
._parse
_global
_stats
()
169 self
._extract
_stacktrace
_lines
(ln
)
170 except EmptyDumpException
:
171 LOGGER
.info('%s%s ...ignored an empty dump.' % (log_header
, self
._path
))
172 except ParsingException
, e
:
173 LOGGER
.error('%s%s ...error %s' % (log_header
, self
._path
, e
))
176 LOGGER
.info('%s%s (version:%s)' % (log_header
, self
._path
, self
._version
))
178 def _parse_version(self
):
179 """Parses a version string in self._lines.
182 A pair of (a string representing a version of the stacktrace dump,
183 and an integer indicating a line number next to the version string).
186 ParsingException for invalid dump versions.
190 # Skip until an identifiable line.
191 headers
= ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
193 raise EmptyDumpException('Empty heap dump file.')
194 (ln
, found
) = skip_while(
196 lambda n
: not self
._lines
[n
].startswith(headers
))
198 raise InvalidDumpException('No version header.')
200 # Identify a version.
201 if self
._lines
[ln
].startswith('heap profile: '):
202 version
= self
._lines
[ln
][13:].strip()
203 if version
in (DUMP_DEEP_5
, DUMP_DEEP_6
):
204 (ln
, _
) = skip_while(
205 ln
, len(self
._lines
),
206 lambda n
: self
._lines
[n
] != 'STACKTRACES:\n')
207 elif version
in DUMP_DEEP_OBSOLETE
:
208 raise ObsoleteDumpVersionException(version
)
210 raise InvalidDumpException('Invalid version: %s' % version
)
211 elif self
._lines
[ln
] == 'STACKTRACES:\n':
212 raise ObsoleteDumpVersionException(DUMP_DEEP_1
)
213 elif self
._lines
[ln
] == 'MMAP_STACKTRACES:\n':
214 raise ObsoleteDumpVersionException(DUMP_DEEP_2
)
218 def _parse_global_stats(self
):
219 """Parses lines in self._lines as global stats."""
220 (ln
, _
) = skip_while(
222 lambda n
: self
._lines
[n
] != 'GLOBAL_STATS:\n')
224 global_stat_names
= [
225 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack',
226 'other', 'nonprofiled-absent', 'nonprofiled-anonymous',
227 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
228 'nonprofiled-stack', 'nonprofiled-other',
229 'profiled-mmap', 'profiled-malloc']
231 for prefix
in global_stat_names
:
232 (ln
, _
) = skip_while(
233 ln
, len(self
._lines
),
234 lambda n
: self
._lines
[n
].split()[0] != prefix
)
235 words
= self
._lines
[ln
].split()
236 self
._global
_stats
[prefix
+ '_virtual'] = int(words
[-2])
237 self
._global
_stats
[prefix
+ '_committed'] = int(words
[-1])
239 def _parse_meta_information(self
):
240 """Parses lines in self._lines for meta information."""
241 (ln
, found
) = skip_while(
243 lambda n
: self
._lines
[n
] != 'META:\n')
249 if self
._lines
[ln
].startswith('Time:'):
250 matched_seconds
= self
._TIME
_PATTERN
_SECONDS
.match(self
._lines
[ln
])
251 matched_format
= self
._TIME
_PATTERN
_FORMAT
.match(self
._lines
[ln
])
253 self
._time
= time
.mktime(datetime
.datetime
.strptime(
254 matched_format
.group(1), '%Y/%m/%d %H:%M:%S').timetuple())
255 if matched_format
.group(2):
256 self
._time
+= float(matched_format
.group(2)[1:]) / 1000.0
257 elif matched_seconds
:
258 self
._time
= float(matched_seconds
.group(1))
259 elif self
._lines
[ln
].startswith('Reason:'):
260 pass # Nothing to do for 'Reason:'
261 elif self
._lines
[ln
].startswith('PageSize: '):
262 self
._pagesize
= int(self
._lines
[ln
][10:])
263 elif self
._lines
[ln
].startswith('CommandLine:'):
265 elif (self
._lines
[ln
].startswith('PageFrame: ') or
266 self
._lines
[ln
].startswith('PFN: ')):
267 if self
._lines
[ln
].startswith('PageFrame: '):
268 words
= self
._lines
[ln
][11:].split(',')
270 words
= self
._lines
[ln
][5:].split(',')
273 self
._pageframe
_length
= 24
274 elif word
== 'Base64':
275 self
._pageframe
_encoding
= 'base64'
276 elif word
== 'PageCount':
277 self
._has
_pagecount
= True
278 elif self
._lines
[ln
].startswith('RunID: '):
279 self
._run
_id
= self
._lines
[ln
][7:].strip()
280 elif (self
._lines
[ln
].startswith('MMAP_LIST:') or
281 self
._lines
[ln
].startswith('GLOBAL_STATS:')):
282 # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found.
288 def _parse_mmap_list(self
):
289 """Parses lines in self._lines as a mmap list."""
290 (ln
, found
) = skip_while(
292 lambda n
: self
._lines
[n
] != 'MMAP_LIST:\n')
301 entry
= procfs
.ProcMaps
.parse_line(self
._lines
[ln
])
304 for _
, _
, attr
in self
._procmaps
.iter_range(entry
.begin
, entry
.end
):
305 for key
, value
in entry
.as_dict().iteritems():
307 current_vma
[key
] = value
311 if self
._lines
[ln
].startswith(' PF: '):
312 for pageframe
in self
._lines
[ln
][5:].split():
313 pageframe_list
.append(PageFrame
.parse(pageframe
, self
._pagesize
))
317 matched
= self
._HOOK
_PATTERN
.match(self
._lines
[ln
])
320 # 2: starting address
322 # 7: hooked or unhooked
323 # 8: additional information
324 if matched
.group(7) == 'hooked':
325 submatched
= self
._HOOKED
_PATTERN
.match(matched
.group(8))
327 submatched
= self
._OLD
_HOOKED
_PATTERN
.match(matched
.group(8))
328 elif matched
.group(7) == 'unhooked':
329 submatched
= self
._UNHOOKED
_PATTERN
.match(matched
.group(8))
331 submatched
= self
._OLD
_UNHOOKED
_PATTERN
.match(matched
.group(8))
333 assert matched
.group(7) in ['hooked', 'unhooked']
335 submatched_dict
= submatched
.groupdict()
336 region_info
= { 'vma': current_vma
}
337 if submatched_dict
.get('TYPE'):
338 region_info
['type'] = submatched_dict
['TYPE'].strip()
339 if submatched_dict
.get('COMMITTED'):
340 region_info
['committed'] = int(submatched_dict
['COMMITTED'])
341 if submatched_dict
.get('RESERVED'):
342 region_info
['reserved'] = int(submatched_dict
['RESERVED'])
343 if submatched_dict
.get('BUCKETID'):
344 region_info
['bucket_id'] = int(submatched_dict
['BUCKETID'])
346 if matched
.group(1) == '(':
347 start
= current_vma
['begin']
349 start
= int(matched
.group(2), 16)
350 if matched
.group(4) == '(':
351 end
= current_vma
['end']
353 end
= int(matched
.group(5), 16)
355 if pageframe_list
and pageframe_list
[0].start_truncated
:
356 pageframe_list
[0].set_size(
357 pageframe_list
[0].size
- start
% self
._pagesize
)
358 if pageframe_list
and pageframe_list
[-1].end_truncated
:
359 pageframe_list
[-1].set_size(
360 pageframe_list
[-1].size
- (self
._pagesize
- end
% self
._pagesize
))
361 region_info
['pageframe'] = pageframe_list
364 self
._map
[(start
, end
)] = (matched
.group(7), region_info
)
367 def _extract_stacktrace_lines(self
, line_number
):
368 """Extracts the position of stacktrace lines.
370 Valid stacktrace lines are stored into self._stacktrace_lines.
373 line_number: A line number to start parsing in lines.
376 ParsingException for invalid dump versions.
378 if self
._version
in (DUMP_DEEP_5
, DUMP_DEEP_6
):
379 (line_number
, _
) = skip_while(
380 line_number
, len(self
._lines
),
381 lambda n
: not self
._lines
[n
].split()[0].isdigit())
382 stacktrace_start
= line_number
383 (line_number
, _
) = skip_while(
384 line_number
, len(self
._lines
),
385 lambda n
: self
._check
_stacktrace
_line
(self
._lines
[n
]))
386 self
._stacktrace
_lines
= self
._lines
[stacktrace_start
:line_number
]
388 elif self
._version
in DUMP_DEEP_OBSOLETE
:
389 raise ObsoleteDumpVersionException(self
._version
)
392 raise InvalidDumpException('Invalid version: %s' % self
._version
)
395 def _check_stacktrace_line(stacktrace_line
):
396 """Checks if a given stacktrace_line is valid as stacktrace.
399 stacktrace_line: A string to be checked.
402 True if the given stacktrace_line is valid.
404 words
= stacktrace_line
.split()
405 if len(words
) < BUCKET_ID
+ 1:
407 if words
[BUCKET_ID
- 1] != '@':
412 class ProcMapsEntryAttribute(ExclusiveRangeDict
.RangeAttribute
):
413 """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict."""
414 _DUMMY_ENTRY
= procfs
.ProcMapsEntry(
429 super(ProcMapsEntryAttribute
, self
).__init
__()
430 self
._entry
= self
._DUMMY
_ENTRY
.as_dict()
433 return str(self
._entry
)
436 return 'ProcMapsEntryAttribute' + str(self
._entry
)
438 def __getitem__(self
, key
):
439 return self
._entry
[key
]
441 def __setitem__(self
, key
, value
):
442 if key
not in self
._entry
:
444 self
._entry
[key
] = value
447 new_entry
= ProcMapsEntryAttribute()
448 for key
, value
in self
._entry
.iteritems():
449 new_entry
[key
] = copy
.deepcopy(value
)
453 def skip_while(index
, max_index
, skipping_condition
):
454 """Increments |index| until |skipping_condition|(|index|) is False.
457 A pair of an integer indicating a line number after skipped, and a
458 boolean value which is True if found a line which skipping_condition
461 while skipping_condition(index
):
463 if index
>= max_index
: