build/android/pylib/symbols/elf_symbolizer.py

   1 # Copyright 2014 The Chromium Authors. All rights reserved.
   2 # Use of this source code is governed by a BSD-style license that can be
   3 # found in the LICENSE file.
   4
   5 import collections
   6 import datetime
   7 import logging
   8 import multiprocessing
   9 import os
  10 import posixpath
  11 import Queue
  12 import re
  13 import subprocess
  14 import sys
  15 import threading
  16 import time
  17
  18
  19 # addr2line builds a possibly infinite memory cache that can exhaust
  20 # the computer's memory if allowed to grow for too long. This constant
  21 # controls how many lookups we do before restarting the process. 4000
  22 # gives near peak performance without extreme memory usage.
  23 ADDR2LINE_RECYCLE_LIMIT = 4000
  24
  25
  26 class ELFSymbolizer(object):
  27   """An uber-fast (multiprocessing, pipelined and asynchronous) ELF symbolizer.
  28
  29   This class is a frontend for addr2line (part of GNU binutils), designed to
  30   symbolize batches of large numbers of symbols for a given ELF file. It
  31   supports sharding symbolization against many addr2line instances and
  32   pipelining of multiple requests per each instance (in order to hide addr2line
  33   internals and OS pipe latencies).
  34
  35   The interface exhibited by this class is a very simple asynchronous interface,
  36   which is based on the following three methods:
  37   - SymbolizeAsync(): used to request (enqueue) resolution of a given address.
  38   - The |callback| method: used to communicated back the symbol information.
  39   - Join(): called to conclude the batch to gather the last outstanding results.
  40   In essence, before the Join method returns, this class will have issued as
  41   many callbacks as the number of SymbolizeAsync() calls. In this regard, note
  42   that due to multiprocess sharding, callbacks can be delivered out of order.
  43
  44   Some background about addr2line:
  45   - it is invoked passing the elf path in the cmdline, piping the addresses in
  46     its stdin and getting results on its stdout.
  47   - it has pretty large response times for the first requests, but it
  48     works very well in streaming mode once it has been warmed up.
  49   - it doesn't scale by itself (on more cores). However, spawning multiple
  50     instances at the same time on the same file is pretty efficient as they
  51     keep hitting the pagecache and become mostly CPU bound.
  52   - it might hang or crash, mostly for OOM. This class deals with both of these
  53     problems.
  54
  55   Despite the "scary" imports and the multi* words above, (almost) no multi-
  56   threading/processing is involved from the python viewpoint. Concurrency
  57   here is achieved by spawning several addr2line subprocesses and handling their
  58   output pipes asynchronously. Therefore, all the code here (with the exception
  59   of the Queue instance in Addr2Line) should be free from mind-blowing
  60   thread-safety concerns.
  61
  62   The multiprocess sharding works as follows:
  63   The symbolizer tries to use the lowest number of addr2line instances as
  64   possible (with respect of |max_concurrent_jobs|) and enqueue all the requests
  65   in a single addr2line instance. For few symbols (i.e. dozens) sharding isn't
  66   worth the startup cost.
  67   The multiprocess logic kicks in as soon as the queues for the existing
  68   instances grow. Specifically, once all the existing instances reach the
  69   |max_queue_size| bound, a new addr2line instance is kicked in.
  70   In the case of a very eager producer (i.e. all |max_concurrent_jobs| instances
  71   have a backlog of |max_queue_size|), back-pressure is applied on the caller by
  72   blocking the SymbolizeAsync method.
  73
  74   This module has been deliberately designed to be dependency free (w.r.t. of
  75   other modules in this project), to allow easy reuse in external projects.
  76   """
  77
  78   def __init__(self, elf_file_path, addr2line_path, callback, inlines=False,
  79       max_concurrent_jobs=None, addr2line_timeout=30, max_queue_size=50,
  80       source_root_path=None, strip_base_path=None):
  81     """Args:
  82       elf_file_path: path of the elf file to be symbolized.
  83       addr2line_path: path of the toolchain's addr2line binary.
  84       callback: a callback which will be invoked for each resolved symbol with
  85           the two args (sym_info, callback_arg). The former is an instance of
  86           |ELFSymbolInfo| and contains the symbol information. The latter is an
  87           embedder-provided argument which is passed to SymbolizeAsync().
  88       inlines: when True, the ELFSymbolInfo will contain also the details about
  89           the outer inlining functions. When False, only the innermost function
  90           will be provided.
  91       max_concurrent_jobs: Max number of addr2line instances spawned.
  92           Parallelize responsibly, addr2line is a memory and I/O monster.
  93       max_queue_size: Max number of outstanding requests per addr2line instance.
  94       addr2line_timeout: Max time (in seconds) to wait for a addr2line response.
  95           After the timeout, the instance will be considered hung and respawned.
  96       source_root_path: In some toolchains only the name of the source file is
  97           is output, without any path information; disambiguation searches
  98           through the source directory specified by |source_root_path| argument
  99           for files whose name matches, adding the full path information to the
 100           output. For example, if the toolchain outputs "unicode.cc" and there
 101           is a file called "unicode.cc" located under |source_root_path|/foo,
 102           the tool will replace "unicode.cc" with
 103           "|source_root_path|/foo/unicode.cc". If there are multiple files with
 104           the same name, disambiguation will fail because the tool cannot
 105           determine which of the files was the source of the symbol.
 106       strip_base_path: Rebases the symbols source paths onto |source_root_path|
 107           (i.e replace |strip_base_path| with |source_root_path).
 108     """
 109     assert(os.path.isfile(addr2line_path)), 'Cannot find ' + addr2line_path
 110     self.elf_file_path = elf_file_path
 111     self.addr2line_path = addr2line_path
 112     self.callback = callback
 113     self.inlines = inlines
 114     self.max_concurrent_jobs = (max_concurrent_jobs or
 115                                 min(multiprocessing.cpu_count(), 4))
 116     self.max_queue_size = max_queue_size
 117     self.addr2line_timeout = addr2line_timeout
 118     self.requests_counter = 0  # For generating monotonic request IDs.
 119     self._a2l_instances = []  # Up to |max_concurrent_jobs| _Addr2Line inst.
 120
 121     # If necessary, create disambiguation lookup table
 122     self.disambiguate = source_root_path is not None
 123     self.disambiguation_table = {}
 124     self.strip_base_path = strip_base_path
 125     if(self.disambiguate):
 126       self.source_root_path = os.path.abspath(source_root_path)
 127       self._CreateDisambiguationTable()
 128
 129     # Create one addr2line instance. More instances will be created on demand
 130     # (up to |max_concurrent_jobs|) depending on the rate of the requests.
 131     self._CreateNewA2LInstance()
 132
 133   def SymbolizeAsync(self, addr, callback_arg=None):
 134     """Requests symbolization of a given address.
 135
 136     This method is not guaranteed to return immediately. It generally does, but
 137     in some scenarios (e.g. all addr2line instances have full queues) it can
 138     block to create back-pressure.
 139
 140     Args:
 141       addr: address to symbolize.
 142       callback_arg: optional argument which will be passed to the |callback|."""
 143     assert(isinstance(addr, int))
 144
 145     # Process all the symbols that have been resolved in the meanwhile.
 146     # Essentially, this drains all the addr2line(s) out queues.
 147     for a2l_to_purge in self._a2l_instances:
 148       a2l_to_purge.ProcessAllResolvedSymbolsInQueue()
 149       a2l_to_purge.RecycleIfNecessary()
 150
 151     # Find the best instance according to this logic:
 152     # 1. Find an existing instance with the shortest queue.
 153     # 2. If all of instances' queues are full, but there is room in the pool,
 154     #    (i.e. < |max_concurrent_jobs|) create a new instance.
 155     # 3. If there were already |max_concurrent_jobs| instances and all of them
 156     #    had full queues, make back-pressure.
 157
 158     # 1.
 159     def _SortByQueueSizeAndReqID(a2l):
 160       return (a2l.queue_size, a2l.first_request_id)
 161     a2l = min(self._a2l_instances, key=_SortByQueueSizeAndReqID)
 162
 163     # 2.
 164     if (a2l.queue_size >= self.max_queue_size and
 165         len(self._a2l_instances) < self.max_concurrent_jobs):
 166       a2l = self._CreateNewA2LInstance()
 167
 168     # 3.
 169     if a2l.queue_size >= self.max_queue_size:
 170       a2l.WaitForNextSymbolInQueue()
 171
 172     a2l.EnqueueRequest(addr, callback_arg)
 173
 174   def Join(self):
 175     """Waits for all the outstanding requests to complete and terminates."""
 176     for a2l in self._a2l_instances:
 177       a2l.WaitForIdle()
 178       a2l.Terminate()
 179
 180   def _CreateNewA2LInstance(self):
 181     assert(len(self._a2l_instances) < self.max_concurrent_jobs)
 182     a2l = ELFSymbolizer.Addr2Line(self)
 183     self._a2l_instances.append(a2l)
 184     return a2l
 185
 186   def _CreateDisambiguationTable(self):
 187     """ Non-unique file names will result in None entries"""
 188     start_time = time.time()
 189     logging.info('Collecting information about available source files...')
 190     self.disambiguation_table = {}
 191
 192     for root, _, filenames in os.walk(self.source_root_path):
 193       for f in filenames:
 194         self.disambiguation_table[f] = os.path.join(root, f) if (f not in
 195                                        self.disambiguation_table) else None
 196     logging.info('Finished collecting information about '
 197                  'possible files (took %.1f s).',
 198                  (time.time() - start_time))
 199
 200
 201   class Addr2Line(object):
 202     """A python wrapper around an addr2line instance.
 203
 204     The communication with the addr2line process looks as follows:
 205       [STDIN]         [STDOUT]  (from addr2line's viewpoint)
 206     > f001111
 207     > f002222
 208                     < Symbol::Name(foo, bar) for f001111
 209                     < /path/to/source/file.c:line_number
 210     > f003333
 211                     < Symbol::Name2() for f002222
 212                     < /path/to/source/file.c:line_number
 213                     < Symbol::Name3() for f003333
 214                     < /path/to/source/file.c:line_number
 215     """
 216
 217     SYM_ADDR_RE = re.compile(r'([^:]+):(\?|\d+).*')
 218
 219     def __init__(self, symbolizer):
 220       self._symbolizer = symbolizer
 221       self._lib_file_name = posixpath.basename(symbolizer.elf_file_path)
 222
 223       # The request queue (i.e. addresses pushed to addr2line's stdin and not
 224       # yet retrieved on stdout)
 225       self._request_queue = collections.deque()
 226
 227       # This is essentially len(self._request_queue). It has been optimized to a
 228       # separate field because turned out to be a perf hot-spot.
 229       self.queue_size = 0
 230
 231       # Keep track of the number of symbols a process has processed to
 232       # avoid a single process growing too big and using all the memory.
 233       self._processed_symbols_count = 0
 234
 235       # Objects required to handle the addr2line subprocess.
 236       self._proc = None  # Subprocess.Popen(...) instance.
 237       self._thread = None  # Threading.thread instance.
 238       self._out_queue = None  # Queue.Queue instance (for buffering a2l stdout).
 239       self._RestartAddr2LineProcess()
 240
 241     def EnqueueRequest(self, addr, callback_arg):
 242       """Pushes an address to addr2line's stdin (and keeps track of it)."""
 243       self._symbolizer.requests_counter += 1  # For global "age" of requests.
 244       req_idx = self._symbolizer.requests_counter
 245       self._request_queue.append((addr, callback_arg, req_idx))
 246       self.queue_size += 1
 247       self._WriteToA2lStdin(addr)
 248
 249     def WaitForIdle(self):
 250       """Waits until all the pending requests have been symbolized."""
 251       while self.queue_size > 0:
 252         self.WaitForNextSymbolInQueue()
 253
 254     def WaitForNextSymbolInQueue(self):
 255       """Waits for the next pending request to be symbolized."""
 256       if not self.queue_size:
 257         return
 258
 259       # This outer loop guards against a2l hanging (detecting stdout timeout).
 260       while True:
 261         start_time = datetime.datetime.now()
 262         timeout = datetime.timedelta(seconds=self._symbolizer.addr2line_timeout)
 263
 264         # The inner loop guards against a2l crashing (checking if it exited).
 265         while (datetime.datetime.now() - start_time < timeout):
 266           # poll() returns !None if the process exited. a2l should never exit.
 267           if self._proc.poll():
 268             logging.warning('addr2line crashed, respawning (lib: %s).' %
 269                             self._lib_file_name)
 270             self._RestartAddr2LineProcess()
 271             # TODO(primiano): the best thing to do in this case would be
 272             # shrinking the pool size as, very likely, addr2line is crashed
 273             # due to low memory (and the respawned one will die again soon).
 274
 275           try:
 276             lines = self._out_queue.get(block=True, timeout=0.25)
 277           except Queue.Empty:
 278             # On timeout (1/4 s.) repeat the inner loop and check if either the
 279             # addr2line process did crash or we waited its output for too long.
 280             continue
 281
 282           # In nominal conditions, we get straight to this point.
 283           self._ProcessSymbolOutput(lines)
 284           return
 285
 286         # If this point is reached, we waited more than |addr2line_timeout|.
 287         logging.warning('Hung addr2line process, respawning (lib: %s).' %
 288                         self._lib_file_name)
 289         self._RestartAddr2LineProcess()
 290
 291     def ProcessAllResolvedSymbolsInQueue(self):
 292       """Consumes all the addr2line output lines produced (without blocking)."""
 293       if not self.queue_size:
 294         return
 295       while True:
 296         try:
 297           lines = self._out_queue.get_nowait()
 298         except Queue.Empty:
 299           break
 300         self._ProcessSymbolOutput(lines)
 301
 302     def RecycleIfNecessary(self):
 303       """Restarts the process if it has been used for too long.
 304
 305       A long running addr2line process will consume excessive amounts
 306       of memory without any gain in performance."""
 307       if self._processed_symbols_count >= ADDR2LINE_RECYCLE_LIMIT:
 308         self._RestartAddr2LineProcess()
 309
 310
 311     def Terminate(self):
 312       """Kills the underlying addr2line process.
 313
 314       The poller |_thread| will terminate as well due to the broken pipe."""
 315       try:
 316         self._proc.kill()
 317         self._proc.communicate()  # Essentially wait() without risking deadlock.
 318       except Exception:  # An exception while terminating? How interesting.
 319         pass
 320       self._proc = None
 321
 322     def _WriteToA2lStdin(self, addr):
 323       self._proc.stdin.write('%s\n' % hex(addr))
 324       if self._symbolizer.inlines:
 325         # In the case of inlines we output an extra blank line, which causes
 326         # addr2line to emit a (??,??:0) tuple that we use as a boundary marker.
 327         self._proc.stdin.write('\n')
 328       self._proc.stdin.flush()
 329
 330     def _ProcessSymbolOutput(self, lines):
 331       """Parses an addr2line symbol output and triggers the client callback."""
 332       (_, callback_arg, _) = self._request_queue.popleft()
 333       self.queue_size -= 1
 334
 335       innermost_sym_info = None
 336       sym_info = None
 337       for (line1, line2) in lines:
 338         prev_sym_info = sym_info
 339         name = line1 if not line1.startswith('?') else None
 340         source_path = None
 341         source_line = None
 342         m = ELFSymbolizer.Addr2Line.SYM_ADDR_RE.match(line2)
 343         if m:
 344           if not m.group(1).startswith('?'):
 345             source_path = m.group(1)
 346             if not m.group(2).startswith('?'):
 347               source_line = int(m.group(2))
 348         else:
 349           logging.warning('Got invalid symbol path from addr2line: %s' % line2)
 350
 351         # In case disambiguation is on, and needed
 352         was_ambiguous = False
 353         disambiguated = False
 354         if self._symbolizer.disambiguate:
 355           if source_path and not posixpath.isabs(source_path):
 356             path = self._symbolizer.disambiguation_table.get(source_path)
 357             was_ambiguous = True
 358             disambiguated = path is not None
 359             source_path = path if disambiguated else source_path
 360
 361           # Use absolute paths (so that paths are consistent, as disambiguation
 362           # uses absolute paths)
 363           if source_path and not was_ambiguous:
 364             source_path = os.path.abspath(source_path)
 365
 366         if source_path and self._symbolizer.strip_base_path:
 367           # Strip the base path
 368           source_path = re.sub('^' + self._symbolizer.strip_base_path,
 369               self._symbolizer.source_root_path or '', source_path)
 370
 371         sym_info = ELFSymbolInfo(name, source_path, source_line, was_ambiguous,
 372                                  disambiguated)
 373         if prev_sym_info:
 374           prev_sym_info.inlined_by = sym_info
 375         if not innermost_sym_info:
 376           innermost_sym_info = sym_info
 377
 378       self._processed_symbols_count += 1
 379       self._symbolizer.callback(innermost_sym_info, callback_arg)
 380
 381     def _RestartAddr2LineProcess(self):
 382       if self._proc:
 383         self.Terminate()
 384
 385       # The only reason of existence of this Queue (and the corresponding
 386       # Thread below) is the lack of a subprocess.stdout.poll_avail_lines().
 387       # Essentially this is a pipe able to extract a couple of lines atomically.
 388       self._out_queue = Queue.Queue()
 389
 390       # Start the underlying addr2line process in line buffered mode.
 391
 392       cmd = [self._symbolizer.addr2line_path, '--functions', '--demangle',
 393           '--exe=' + self._symbolizer.elf_file_path]
 394       if self._symbolizer.inlines:
 395         cmd += ['--inlines']
 396       self._proc = subprocess.Popen(cmd, bufsize=1, stdout=subprocess.PIPE,
 397           stdin=subprocess.PIPE, stderr=sys.stderr, close_fds=True)
 398
 399       # Start the poller thread, which simply moves atomically the lines read
 400       # from the addr2line's stdout to the |_out_queue|.
 401       self._thread = threading.Thread(
 402           target=ELFSymbolizer.Addr2Line.StdoutReaderThread,
 403           args=(self._proc.stdout, self._out_queue, self._symbolizer.inlines))
 404       self._thread.daemon = True  # Don't prevent early process exit.
 405       self._thread.start()
 406
 407       self._processed_symbols_count = 0
 408
 409       # Replay the pending requests on the new process (only for the case
 410       # of a hung addr2line timing out during the game).
 411       for (addr, _, _) in self._request_queue:
 412         self._WriteToA2lStdin(addr)
 413
 414     @staticmethod
 415     def StdoutReaderThread(process_pipe, queue, inlines):
 416       """The poller thread fn, which moves the addr2line stdout to the |queue|.
 417
 418       This is the only piece of code not running on the main thread. It merely
 419       writes to a Queue, which is thread-safe. In the case of inlines, it
 420       detects the ??,??:0 marker and sends the lines atomically, such that the
 421       main thread always receives all the lines corresponding to one symbol in
 422       one shot."""
 423       try:
 424         lines_for_one_symbol = []
 425         while True:
 426           line1 = process_pipe.readline().rstrip('\r\n')
 427           line2 = process_pipe.readline().rstrip('\r\n')
 428           if not line1 or not line2:
 429             break
 430           inline_has_more_lines = inlines and (len(lines_for_one_symbol) == 0 or
 431                                   (line1 != '??' and line2 != '??:0'))
 432           if not inlines or inline_has_more_lines:
 433             lines_for_one_symbol += [(line1, line2)]
 434           if inline_has_more_lines:
 435             continue
 436           queue.put(lines_for_one_symbol)
 437           lines_for_one_symbol = []
 438         process_pipe.close()
 439
 440       # Every addr2line processes will die at some point, please die silently.
 441       except (IOError, OSError):
 442         pass
 443
 444     @property
 445     def first_request_id(self):
 446       """Returns the request_id of the oldest pending request in the queue."""
 447       return self._request_queue[0][2] if self._request_queue else 0
 448
 449
 450 class ELFSymbolInfo(object):
 451   """The result of the symbolization passed as first arg. of each callback."""
 452
 453   def __init__(self, name, source_path, source_line, was_ambiguous=False,
 454                disambiguated=False):
 455     """All the fields here can be None (if addr2line replies with '??')."""
 456     self.name = name
 457     self.source_path = source_path
 458     self.source_line = source_line
 459     # In the case of |inlines|=True, the |inlined_by| points to the outer
 460     # function inlining the current one (and so on, to form a chain).
 461     self.inlined_by = None
 462     self.disambiguated = disambiguated
 463     self.was_ambiguous = was_ambiguous
 464
 465   def __str__(self):
 466     return '%s [%s:%d]' % (
 467         self.name or '??', self.source_path or '??', self.source_line or 0)