Roll src/third_party/WebKit f298044:aa8346d (svn 202628:202629)
[chromium-blink-merge.git] / tools / symsrc / source_index.py
blobf780cd6bab99fcbc640426d90d2c22f8c2184629
1 #!/usr/bin/env python
2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Usage: <win-path-to-pdb.pdb>
7 This tool will take a PDB on the command line, extract the source files that
8 were used in building the PDB, query the source server for which repository
9 and revision these files are at, and then finally write this information back
10 into the PDB in a format that the debugging tools understand. This allows for
11 automatic source debugging, as all of the information is contained in the PDB,
12 and the debugger can go out and fetch the source files.
14 You most likely want to run these immediately after a build, since the source
15 input files need to match the generated PDB, and we want the correct
16 revision information for the exact files that were used for the build.
18 The following files from a windbg + source server installation are expected
19 to reside in the same directory as this python script:
20 dbghelp.dll
21 pdbstr.exe
22 srctool.exe
24 NOTE: Expected to run under a native win32 python, NOT cygwin. All paths are
25 dealt with as win32 paths, since we have to interact with the Microsoft tools.
26 """
28 import os
29 import optparse
30 import sys
31 import tempfile
32 import time
33 import subprocess
34 import win32api
36 from collections import namedtuple
38 # This serves two purposes. First, it acts as a whitelist, and only files
39 # from repositories listed here will be source indexed. Second, it allows us
40 # to map from one URL to another, so we can map to external source servers. It
41 # also indicates if the source for this project will be retrieved in a base64
42 # encoded format.
43 # TODO(sebmarchand): Initialize this variable in the main function and pass it
44 # to the sub functions instead of having a global variable.
45 REPO_MAP = {
46 'http://src.chromium.org/svn': {
47 'url': 'https://src.chromium.org/chrome/'
48 '{file_path}?revision={revision}',
49 'base64': False
51 'https://src.chromium.org/svn': {
52 'url': 'https://src.chromium.org/chrome/'
53 '{file_path}?revision={revision}',
54 'base64': False
59 PROJECT_GROUPS = [
60 # Googlecode SVN projects
62 'projects': [
63 'angleproject',
64 'google-breakpad',
65 'google-cache-invalidation-api',
66 'google-url',
67 'googletest',
68 'leveldb',
69 'libphonenumber',
70 'libyuv',
71 'open-vcdiff',
72 'ots',
73 'sawbuck',
74 'sfntly',
75 'smhasher',
76 'v8',
77 'v8-i18n',
78 'webrtc',
80 'public_url': 'https://%s.googlecode.com/svn-history/' \
81 'r{revision}/{file_path}',
82 'svn_urls': [
83 'svn://svn-mirror.golo.chromium.org/%s',
84 'http://src.chromium.org/%s',
85 'https://src.chromium.org/%s',
86 'http://%s.googlecode.com/svn',
87 'https://%s.googlecode.com/svn',
90 # Googlecode Git projects
92 'projects': [
93 'syzygy',
95 'public_url': 'https://%s.googlecode.com/git-history/' \
96 '{revision}/{file_path}',
97 'svn_urls': [
98 'https://code.google.com/p/%s/',
101 # Chrome projects
103 'projects': [
104 'blink',
105 'chrome',
106 'multivm',
107 'native_client',
109 'public_url': 'https://src.chromium.org/%s/' \
110 '{file_path}?revision={revision}',
111 'svn_urls': [
112 'svn://chrome-svn/%s',
113 'svn://chrome-svn.corp.google.com/%s',
114 'svn://svn-mirror.golo.chromium.org/%s',
115 'svn://svn.chromium.org/%s',
120 # A named tuple used to store the information about a repository.
122 # It contains the following members:
123 # - repo: The URL of the repository;
124 # - rev: The revision (or hash) of the current checkout.
125 # - file_list: The list of files coming from this repository.
126 # - root_path: The root path of this checkout.
127 # - path_prefix: A prefix to apply to the filename of the files coming from
128 # this repository.
129 RevisionInfo = namedtuple('RevisionInfo',
130 ['repo', 'rev', 'files', 'root_path', 'path_prefix'])
133 def GetCasedFilePath(filename):
134 """Return the correctly cased path for a given filename"""
135 return win32api.GetLongPathName(win32api.GetShortPathName(unicode(filename)))
138 def FillRepositoriesMap():
139 """ Fill the repositories map with the whitelisted projects. """
140 for project_group in PROJECT_GROUPS:
141 for project in project_group['projects']:
142 for svn_url in project_group['svn_urls']:
143 REPO_MAP[svn_url % project] = {
144 'url': project_group['public_url'] % project,
145 'base64': False
147 REPO_MAP[project_group['public_url'] % project] = None
149 FillRepositoriesMap()
152 def FindFile(filename):
153 """Return the full windows path to a file in the same dir as this code."""
154 thisdir = os.path.dirname(os.path.join(os.path.curdir, __file__))
155 return os.path.abspath(os.path.join(thisdir, filename))
158 def RunCommand(*cmd, **kwargs):
159 """Runs a command.
161 Returns what have been printed to stdout by this command.
163 kwargs:
164 raise_on_failure: Indicates if an exception should be raised on failure, if
165 set to false then the function will return None.
167 kwargs.setdefault('stdin', subprocess.PIPE)
168 kwargs.setdefault('stdout', subprocess.PIPE)
169 kwargs.setdefault('stderr', subprocess.PIPE)
170 kwargs.setdefault('universal_newlines', True)
171 raise_on_failure = kwargs.pop('raise_on_failure', True)
173 proc = subprocess.Popen(cmd, **kwargs)
174 ret, err = proc.communicate()
175 if proc.returncode != 0:
176 if raise_on_failure:
177 print 'Error: %s' % err
178 raise subprocess.CalledProcessError(proc.returncode, cmd)
179 return
181 ret = (ret or '').rstrip('\n')
182 return ret
185 def ExtractSourceFiles(pdb_filename):
186 """Extract a list of local paths of the source files from a PDB."""
187 src_files = RunCommand(FindFile('srctool.exe'), '-r', pdb_filename)
188 if not src_files or src_files.startswith("srctool: "):
189 raise Exception("srctool failed: " + src_files)
190 return set(x.lower() for x in src_files.split('\n') if len(x) != 0)
193 def ReadSourceStream(pdb_filename):
194 """Read the contents of the source information stream from a PDB."""
195 srctool = subprocess.Popen([FindFile('pdbstr.exe'),
196 '-r', '-s:srcsrv',
197 '-p:%s' % pdb_filename],
198 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
199 data, _ = srctool.communicate()
201 if ((srctool.returncode != 0 and srctool.returncode != -1) or
202 data.startswith("pdbstr: ")):
203 raise Exception("pdbstr failed: " + data)
204 return data
207 def WriteSourceStream(pdb_filename, data):
208 """Write the contents of the source information stream to a PDB."""
209 # Write out the data to a temporary filename that we can pass to pdbstr.
210 (f, fname) = tempfile.mkstemp()
211 f = os.fdopen(f, "wb")
212 f.write(data)
213 f.close()
215 srctool = subprocess.Popen([FindFile('pdbstr.exe'),
216 '-w', '-s:srcsrv',
217 '-i:%s' % fname,
218 '-p:%s' % pdb_filename],
219 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
220 data, _ = srctool.communicate()
222 if ((srctool.returncode != 0 and srctool.returncode != -1) or
223 data.startswith("pdbstr: ")):
224 raise Exception("pdbstr failed: " + data)
226 os.unlink(fname)
229 def GetSVNRepoInfo(local_path):
230 """Calls svn info to extract the SVN information about a path."""
231 # We call svn.bat to make sure and get the depot tools SVN and not cygwin.
232 info = RunCommand('svn.bat', 'info', local_path, raise_on_failure=False)
233 if not info:
234 return
235 # Hack up into a dictionary of the fields printed by svn info.
236 vals = dict((y.split(': ', 2) for y in info.split('\n') if y))
237 return vals
240 def ExtractSVNInfo(local_filename):
241 """Checks if a file is coming from a svn repository and if so returns some
242 information about it.
244 Args:
245 local_filename: The name of the file that we want to check.
247 Returns:
248 None if the file doesn't come from a svn repository, otherwise it returns a
249 RevisionInfo tuple.
251 # Try to get the svn information about this file.
252 vals = GetSVNRepoInfo(local_filename)
253 if not vals:
254 return
256 repo = vals['Repository Root']
257 if not vals['URL'].startswith(repo):
258 raise Exception("URL is not inside of the repository root?!?")
259 rev = vals['Revision']
261 svn_local_root = os.path.split(local_filename)[0]
263 # We need to look at the SVN URL of the current path to handle the case when
264 # we do a partial SVN checkout inside another checkout of the same repository.
265 # This happens in Chromium where we do some checkout of
266 # '/trunk/deps/third_party' in 'src/third_party'.
267 svn_root_url = os.path.dirname(vals['URL'])
269 # Don't try to list all the files from this repository as this seem to slow
270 # down the indexing, instead index one file at a time.
271 file_list = [local_filename.replace(svn_local_root, '').lstrip(os.path.sep)]
273 return RevisionInfo(repo=repo, rev=rev, files=file_list,
274 root_path=svn_local_root, path_prefix=svn_root_url.replace(repo, ''))
277 def ExtractGitInfo(local_filename):
278 """Checks if a file is coming from a git repository and if so returns some
279 information about it.
281 Args:
282 local_filename: The name of the file that we want to check.
284 Returns:
285 None if the file doesn't come from a git repository, otherwise it returns a
286 RevisionInfo tuple.
288 # Starts by checking if this file is coming from a git repository. For that
289 # we'll start by calling 'git info' on this file; for this to work we need to
290 # make sure that the current working directory is correctly cased. It turns
291 # out that even on Windows the casing of the path passed in the |cwd| argument
292 # of subprocess.Popen matters and if it's not correctly cased then 'git info'
293 # will return None even if the file is coming from a git repository. This
294 # is not the case if we're just interested in checking if the path containing
295 # |local_filename| is coming from a git repository, in this case the casing
296 # doesn't matter.
297 local_filename = GetCasedFilePath(local_filename)
298 local_file_basename = os.path.basename(local_filename)
299 local_file_dir = os.path.dirname(local_filename)
300 file_info = RunCommand('git.bat', 'log', '-n', '1', local_file_basename,
301 cwd=local_file_dir, raise_on_failure=False)
303 if not file_info:
304 return
306 # Get the revision of the master branch.
307 rev = RunCommand('git.bat', 'rev-parse', 'HEAD', cwd=local_file_dir)
309 # Get the url of the remote repository.
310 repo = RunCommand('git.bat', 'config', '--get', 'remote.origin.url',
311 cwd=local_file_dir)
312 # If the repository point to a local directory then we need to run this
313 # command one more time from this directory to get the repository url.
314 if os.path.isdir(repo):
315 repo = RunCommand('git.bat', 'config', '--get', 'remote.origin.url',
316 cwd=repo)
318 # Don't use the authenticated path.
319 repo = repo.replace('googlesource.com/a/', 'googlesource.com/')
321 # Get the relative file path for this file in the git repository.
322 git_path = RunCommand('git.bat', 'ls-tree', '--full-name', '--name-only',
323 'HEAD', local_file_basename, cwd=local_file_dir).replace('/','\\')
325 if not git_path:
326 return
328 git_root_path = local_filename.replace(git_path, '')
330 if repo not in REPO_MAP:
331 # Automatically adds the project coming from a git GoogleCode repository to
332 # the repository map. The files from these repositories are accessible via
333 # gitiles in a base64 encoded format.
334 if 'chromium.googlesource.com' in repo:
335 REPO_MAP[repo] = {
336 'url': '%s/+/{revision}/{file_path}?format=TEXT' % repo,
337 'base64': True
340 # Get the list of files coming from this repository.
341 git_file_list = RunCommand('git.bat', 'ls-tree', '--full-name', '--name-only',
342 'HEAD', '-r', cwd=git_root_path)
344 file_list = [x for x in git_file_list.splitlines() if len(x) != 0]
346 return RevisionInfo(repo=repo, rev=rev, files=file_list,
347 root_path=git_root_path, path_prefix=None)
350 def IndexFilesFromRepo(local_filename, file_list, output_lines):
351 """Checks if a given file is a part of a revision control repository (svn or
352 git) and index all the files from this repository if it's the case.
354 Args:
355 local_filename: The filename of the current file.
356 file_list: The list of files that should be indexed.
357 output_lines: The source indexing lines that will be appended to the PDB.
359 Returns the number of indexed files.
361 indexed_files = 0
363 # Try to extract the revision info for the current file.
364 info = ExtractGitInfo(local_filename)
365 if not info:
366 info = ExtractSVNInfo(local_filename)
368 repo = info.repo
369 rev = info.rev
370 files = info.files
371 root_path = info.root_path.lower()
373 # Checks if we should index this file and if the source that we'll retrieve
374 # will be base64 encoded.
375 should_index = False
376 base_64 = False
377 if repo in REPO_MAP:
378 should_index = True
379 base_64 = REPO_MAP[repo].get('base64')
380 else:
381 repo = None
383 # Iterates over the files from this repo and index them if needed.
384 for file_iter in files:
385 current_filename = file_iter.lower()
386 full_file_path = os.path.normpath(os.path.join(root_path, current_filename))
387 # Checks if the file is in the list of files to be indexed.
388 if full_file_path in file_list:
389 if should_index:
390 source_url = ''
391 current_file = file_iter
392 # Prefix the filename with the prefix for this repository if needed.
393 if info.path_prefix:
394 current_file = os.path.join(info.path_prefix, current_file)
395 source_url = REPO_MAP[repo].get('url').format(revision=rev,
396 file_path=os.path.normpath(current_file).replace('\\', '/'))
397 output_lines.append('%s*%s*%s*%s*%s' % (full_file_path, current_file,
398 rev, source_url, 'base64.b64decode' if base_64 else ''))
399 indexed_files += 1
400 file_list.remove(full_file_path)
402 # The input file should have been removed from the list of files to index.
403 if indexed_files and local_filename in file_list:
404 print '%s shouldn\'t be in the list of files to index anymore.' % \
405 local_filename
406 # TODO(sebmarchand): Turn this into an exception once I've confirmed that
407 # this doesn't happen on the official builder.
408 file_list.remove(local_filename)
410 return indexed_files
413 def DirectoryIsUnderPublicVersionControl(local_dir):
414 # Checks if this directory is from a Git checkout.
415 info = RunCommand('git.bat', 'config', '--get', 'remote.origin.url',
416 cwd=local_dir, raise_on_failure=False)
417 if info:
418 return True
420 # If not checks if it's from a SVN checkout.
421 info = GetSVNRepoInfo(local_dir)
422 if info:
423 return True
425 return False
428 def UpdatePDB(pdb_filename, verbose=True, build_dir=None, toolchain_dir=None):
429 """Update a pdb file with source information."""
430 dir_blacklist = { }
432 if build_dir:
433 # Blacklisting the build directory allows skipping the generated files, for
434 # Chromium this makes the indexing ~10x faster.
435 build_dir = (os.path.normpath(build_dir)).lower()
436 for directory, _, _ in os.walk(build_dir):
437 dir_blacklist[directory.lower()] = True
438 dir_blacklist[build_dir.lower()] = True
440 if toolchain_dir:
441 # Blacklisting the directories from the toolchain as we don't have revision
442 # info for them.
443 toolchain_dir = (os.path.normpath(toolchain_dir)).lower()
444 for directory, _, _ in os.walk(build_dir):
445 dir_blacklist[directory.lower()] = True
446 dir_blacklist[toolchain_dir.lower()] = True
448 # Writes the header of the source index stream.
450 # Here's the description of the variables used in the SRC_* macros (those
451 # variables have to be defined for every source file that we want to index):
452 # var1: The file path.
453 # var2: The name of the file without its path.
454 # var3: The revision or the hash of this file's repository.
455 # var4: The URL to this file.
456 # var5: (optional) The python method to call to decode this file, e.g. for
457 # a base64 encoded file this value should be 'base64.b64decode'.
458 lines = [
459 'SRCSRV: ini ------------------------------------------------',
460 'VERSION=1',
461 'INDEXVERSION=2',
462 'VERCTRL=Subversion',
463 'DATETIME=%s' % time.asctime(),
464 'SRCSRV: variables ------------------------------------------',
465 'SRC_EXTRACT_TARGET_DIR=%targ%\%fnbksl%(%var2%)\%var3%',
466 'SRC_EXTRACT_TARGET=%SRC_EXTRACT_TARGET_DIR%\%fnfile%(%var1%)',
467 'SRC_EXTRACT_CMD=cmd /c "mkdir "%SRC_EXTRACT_TARGET_DIR%" & python -c '
468 '"import urllib2, base64;'
469 'url = \\\"%var4%\\\";'
470 'u = urllib2.urlopen(url);'
471 'print %var5%(u.read());" > "%SRC_EXTRACT_TARGET%""',
472 'SRCSRVTRG=%SRC_EXTRACT_TARGET%',
473 'SRCSRVCMD=%SRC_EXTRACT_CMD%',
474 'SRCSRV: source files ---------------------------------------',
477 if ReadSourceStream(pdb_filename):
478 raise Exception("PDB already has source indexing information!")
480 filelist = ExtractSourceFiles(pdb_filename)
481 number_of_files = len(filelist)
482 indexed_files_total = 0
483 while filelist:
484 filename = next(iter(filelist))
485 filedir = os.path.dirname(filename)
486 if verbose:
487 print "[%d / %d] Processing: %s" % (number_of_files - len(filelist),
488 number_of_files, filename)
490 # This directory is blacklisted, either because it's not part of a
491 # repository, or from one we're not interested in indexing.
492 if dir_blacklist.get(filedir, False):
493 if verbose:
494 print " skipping, directory is blacklisted."
495 filelist.remove(filename)
496 continue
498 # Skip the files that don't exist on the current machine.
499 if not os.path.exists(filename):
500 filelist.remove(filename)
501 continue
503 # Try to index the current file and all the ones coming from the same
504 # repository.
505 indexed_files = IndexFilesFromRepo(filename, filelist, lines)
506 if not indexed_files:
507 if not DirectoryIsUnderPublicVersionControl(filedir):
508 dir_blacklist[filedir] = True
509 if verbose:
510 print "Adding %s to the blacklist." % filedir
511 filelist.remove(filename)
512 continue
514 indexed_files_total += indexed_files
516 if verbose:
517 print " %d files have been indexed." % indexed_files
519 lines.append('SRCSRV: end ------------------------------------------------')
521 WriteSourceStream(pdb_filename, '\r\n'.join(lines))
523 if verbose:
524 print "%d / %d files have been indexed." % (indexed_files_total,
525 number_of_files)
528 def main():
529 parser = optparse.OptionParser()
530 parser.add_option('-v', '--verbose', action='store_true', default=False)
531 parser.add_option('--build-dir', help='The original build directory, if set '
532 'all the files present in this directory (or one of its subdirectories) '
533 'will be skipped.')
534 parser.add_option('--toolchain-dir', help='The directory containing the '
535 'toolchain that has been used for this build. If set all the files '
536 'present in this directory (or one of its subdirectories) will be '
537 'skipped.')
538 options, args = parser.parse_args()
540 if not args:
541 parser.error('Specify a pdb')
543 for pdb in args:
544 UpdatePDB(pdb, options.verbose, options.build_dir)
546 return 0
549 if __name__ == '__main__':
550 sys.exit(main())