2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Usage: <win-path-to-pdb.pdb>
7 This tool will take a PDB on the command line, extract the source files that
8 were used in building the PDB, query the source server for which repository
9 and revision these files are at, and then finally write this information back
10 into the PDB in a format that the debugging tools understand. This allows for
11 automatic source debugging, as all of the information is contained in the PDB,
12 and the debugger can go out and fetch the source files.
14 You most likely want to run these immediately after a build, since the source
15 input files need to match the generated PDB, and we want the correct
16 revision information for the exact files that were used for the build.
18 The following files from a windbg + source server installation are expected
19 to reside in the same directory as this python script:
24 NOTE: Expected to run under a native win32 python, NOT cygwin. All paths are
25 dealt with as win32 paths, since we have to interact with the Microsoft tools.
36 from collections
import namedtuple
38 # This serves two purposes. First, it acts as a whitelist, and only files
39 # from repositories listed here will be source indexed. Second, it allows us
40 # to map from one URL to another, so we can map to external source servers. It
41 # also indicates if the source for this project will be retrieved in a base64
43 # TODO(sebmarchand): Initialize this variable in the main function and pass it
44 # to the sub functions instead of having a global variable.
46 'http://src.chromium.org/svn': {
47 'url': 'https://src.chromium.org/chrome/'
48 '{file_path}?revision={revision}',
51 'https://src.chromium.org/svn': {
52 'url': 'https://src.chromium.org/chrome/'
53 '{file_path}?revision={revision}',
60 # Googlecode SVN projects
65 'google-cache-invalidation-api',
80 'public_url': 'https://%s.googlecode.com/svn-history/' \
81 'r{revision}/{file_path}',
83 'svn://svn-mirror.golo.chromium.org/%s',
84 'http://src.chromium.org/%s',
85 'https://src.chromium.org/%s',
86 'http://%s.googlecode.com/svn',
87 'https://%s.googlecode.com/svn',
90 # Googlecode Git projects
95 'public_url': 'https://%s.googlecode.com/git-history/' \
96 '{revision}/{file_path}',
98 'https://code.google.com/p/%s/',
109 'public_url': 'https://src.chromium.org/%s/' \
110 '{file_path}?revision={revision}',
112 'svn://chrome-svn/%s',
113 'svn://chrome-svn.corp.google.com/%s',
114 'svn://svn-mirror.golo.chromium.org/%s',
115 'svn://svn.chromium.org/%s',
120 # A named tuple used to store the information about a repository.
122 # It contains the following members:
123 # - repo: The URL of the repository;
124 # - rev: The revision (or hash) of the current checkout.
125 # - file_list: The list of files coming from this repository.
126 # - root_path: The root path of this checkout.
127 # - path_prefix: A prefix to apply to the filename of the files coming from
129 RevisionInfo
= namedtuple('RevisionInfo',
130 ['repo', 'rev', 'files', 'root_path', 'path_prefix'])
133 def GetCasedFilePath(filename
):
134 """Return the correctly cased path for a given filename"""
135 return win32api
.GetLongPathName(win32api
.GetShortPathName(unicode(filename
)))
138 def FillRepositoriesMap():
139 """ Fill the repositories map with the whitelisted projects. """
140 for project_group
in PROJECT_GROUPS
:
141 for project
in project_group
['projects']:
142 for svn_url
in project_group
['svn_urls']:
143 REPO_MAP
[svn_url
% project
] = {
144 'url': project_group
['public_url'] % project
,
147 REPO_MAP
[project_group
['public_url'] % project
] = None
149 FillRepositoriesMap()
152 def FindFile(filename
):
153 """Return the full windows path to a file in the same dir as this code."""
154 thisdir
= os
.path
.dirname(os
.path
.join(os
.path
.curdir
, __file__
))
155 return os
.path
.abspath(os
.path
.join(thisdir
, filename
))
158 def RunCommand(*cmd
, **kwargs
):
161 Returns what have been printed to stdout by this command.
164 raise_on_failure: Indicates if an exception should be raised on failure, if
165 set to false then the function will return None.
167 kwargs
.setdefault('stdin', subprocess
.PIPE
)
168 kwargs
.setdefault('stdout', subprocess
.PIPE
)
169 kwargs
.setdefault('stderr', subprocess
.PIPE
)
170 kwargs
.setdefault('universal_newlines', True)
171 raise_on_failure
= kwargs
.pop('raise_on_failure', True)
173 proc
= subprocess
.Popen(cmd
, **kwargs
)
174 ret
, err
= proc
.communicate()
175 if proc
.returncode
!= 0:
177 print 'Error: %s' % err
178 raise subprocess
.CalledProcessError(proc
.returncode
, cmd
)
181 ret
= (ret
or '').rstrip('\n')
185 def ExtractSourceFiles(pdb_filename
):
186 """Extract a list of local paths of the source files from a PDB."""
187 src_files
= RunCommand(FindFile('srctool.exe'), '-r', pdb_filename
)
188 if not src_files
or src_files
.startswith("srctool: "):
189 raise Exception("srctool failed: " + src_files
)
190 return set(x
.lower() for x
in src_files
.split('\n') if len(x
) != 0)
193 def ReadSourceStream(pdb_filename
):
194 """Read the contents of the source information stream from a PDB."""
195 srctool
= subprocess
.Popen([FindFile('pdbstr.exe'),
197 '-p:%s' % pdb_filename
],
198 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
199 data
, _
= srctool
.communicate()
201 if ((srctool
.returncode
!= 0 and srctool
.returncode
!= -1) or
202 data
.startswith("pdbstr: ")):
203 raise Exception("pdbstr failed: " + data
)
207 def WriteSourceStream(pdb_filename
, data
):
208 """Write the contents of the source information stream to a PDB."""
209 # Write out the data to a temporary filename that we can pass to pdbstr.
210 (f
, fname
) = tempfile
.mkstemp()
211 f
= os
.fdopen(f
, "wb")
215 srctool
= subprocess
.Popen([FindFile('pdbstr.exe'),
218 '-p:%s' % pdb_filename
],
219 stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
220 data
, _
= srctool
.communicate()
222 if ((srctool
.returncode
!= 0 and srctool
.returncode
!= -1) or
223 data
.startswith("pdbstr: ")):
224 raise Exception("pdbstr failed: " + data
)
229 def GetSVNRepoInfo(local_path
):
230 """Calls svn info to extract the SVN information about a path."""
231 # We call svn.bat to make sure and get the depot tools SVN and not cygwin.
232 info
= RunCommand('svn.bat', 'info', local_path
, raise_on_failure
=False)
235 # Hack up into a dictionary of the fields printed by svn info.
236 vals
= dict((y
.split(': ', 2) for y
in info
.split('\n') if y
))
240 def ExtractSVNInfo(local_filename
):
241 """Checks if a file is coming from a svn repository and if so returns some
242 information about it.
245 local_filename: The name of the file that we want to check.
248 None if the file doesn't come from a svn repository, otherwise it returns a
251 # Try to get the svn information about this file.
252 vals
= GetSVNRepoInfo(local_filename
)
256 repo
= vals
['Repository Root']
257 if not vals
['URL'].startswith(repo
):
258 raise Exception("URL is not inside of the repository root?!?")
259 rev
= vals
['Revision']
261 svn_local_root
= os
.path
.split(local_filename
)[0]
263 # We need to look at the SVN URL of the current path to handle the case when
264 # we do a partial SVN checkout inside another checkout of the same repository.
265 # This happens in Chromium where we do some checkout of
266 # '/trunk/deps/third_party' in 'src/third_party'.
267 svn_root_url
= os
.path
.dirname(vals
['URL'])
269 # Don't try to list all the files from this repository as this seem to slow
270 # down the indexing, instead index one file at a time.
271 file_list
= [local_filename
.replace(svn_local_root
, '').lstrip(os
.path
.sep
)]
273 return RevisionInfo(repo
=repo
, rev
=rev
, files
=file_list
,
274 root_path
=svn_local_root
, path_prefix
=svn_root_url
.replace(repo
, ''))
277 def ExtractGitInfo(local_filename
):
278 """Checks if a file is coming from a git repository and if so returns some
279 information about it.
282 local_filename: The name of the file that we want to check.
285 None if the file doesn't come from a git repository, otherwise it returns a
288 # Starts by checking if this file is coming from a git repository. For that
289 # we'll start by calling 'git info' on this file; for this to work we need to
290 # make sure that the current working directory is correctly cased. It turns
291 # out that even on Windows the casing of the path passed in the |cwd| argument
292 # of subprocess.Popen matters and if it's not correctly cased then 'git info'
293 # will return None even if the file is coming from a git repository. This
294 # is not the case if we're just interested in checking if the path containing
295 # |local_filename| is coming from a git repository, in this case the casing
297 local_filename
= GetCasedFilePath(local_filename
)
298 local_file_basename
= os
.path
.basename(local_filename
)
299 local_file_dir
= os
.path
.dirname(local_filename
)
300 file_info
= RunCommand('git.bat', 'log', '-n', '1', local_file_basename
,
301 cwd
=local_file_dir
, raise_on_failure
=False)
306 # Get the revision of the master branch.
307 rev
= RunCommand('git.bat', 'rev-parse', 'HEAD', cwd
=local_file_dir
)
309 # Get the url of the remote repository.
310 repo
= RunCommand('git.bat', 'config', '--get', 'remote.origin.url',
312 # If the repository point to a local directory then we need to run this
313 # command one more time from this directory to get the repository url.
314 if os
.path
.isdir(repo
):
315 repo
= RunCommand('git.bat', 'config', '--get', 'remote.origin.url',
318 # Don't use the authenticated path.
319 repo
= repo
.replace('googlesource.com/a/', 'googlesource.com/')
321 # Get the relative file path for this file in the git repository.
322 git_path
= RunCommand('git.bat', 'ls-tree', '--full-name', '--name-only',
323 'HEAD', local_file_basename
, cwd
=local_file_dir
).replace('/','\\')
328 git_root_path
= local_filename
.replace(git_path
, '')
330 if repo
not in REPO_MAP
:
331 # Automatically adds the project coming from a git GoogleCode repository to
332 # the repository map. The files from these repositories are accessible via
333 # gitiles in a base64 encoded format.
334 if 'chromium.googlesource.com' in repo
:
336 'url': '%s/+/{revision}/{file_path}?format=TEXT' % repo
,
340 # Get the list of files coming from this repository.
341 git_file_list
= RunCommand('git.bat', 'ls-tree', '--full-name', '--name-only',
342 'HEAD', '-r', cwd
=git_root_path
)
344 file_list
= [x
for x
in git_file_list
.splitlines() if len(x
) != 0]
346 return RevisionInfo(repo
=repo
, rev
=rev
, files
=file_list
,
347 root_path
=git_root_path
, path_prefix
=None)
350 def IndexFilesFromRepo(local_filename
, file_list
, output_lines
):
351 """Checks if a given file is a part of a revision control repository (svn or
352 git) and index all the files from this repository if it's the case.
355 local_filename: The filename of the current file.
356 file_list: The list of files that should be indexed.
357 output_lines: The source indexing lines that will be appended to the PDB.
359 Returns the number of indexed files.
363 # Try to extract the revision info for the current file.
364 info
= ExtractGitInfo(local_filename
)
366 info
= ExtractSVNInfo(local_filename
)
371 root_path
= info
.root_path
.lower()
373 # Checks if we should index this file and if the source that we'll retrieve
374 # will be base64 encoded.
379 base_64
= REPO_MAP
[repo
].get('base64')
383 # Iterates over the files from this repo and index them if needed.
384 for file_iter
in files
:
385 current_filename
= file_iter
.lower()
386 full_file_path
= os
.path
.normpath(os
.path
.join(root_path
, current_filename
))
387 # Checks if the file is in the list of files to be indexed.
388 if full_file_path
in file_list
:
391 current_file
= file_iter
392 # Prefix the filename with the prefix for this repository if needed.
394 current_file
= os
.path
.join(info
.path_prefix
, current_file
)
395 source_url
= REPO_MAP
[repo
].get('url').format(revision
=rev
,
396 file_path
=os
.path
.normpath(current_file
).replace('\\', '/'))
397 output_lines
.append('%s*%s*%s*%s*%s' % (full_file_path
, current_file
,
398 rev
, source_url
, 'base64.b64decode' if base_64
else ''))
400 file_list
.remove(full_file_path
)
402 # The input file should have been removed from the list of files to index.
403 if indexed_files
and local_filename
in file_list
:
404 print '%s shouldn\'t be in the list of files to index anymore.' % \
406 # TODO(sebmarchand): Turn this into an exception once I've confirmed that
407 # this doesn't happen on the official builder.
408 file_list
.remove(local_filename
)
413 def DirectoryIsUnderPublicVersionControl(local_dir
):
414 # Checks if this directory is from a Git checkout.
415 info
= RunCommand('git.bat', 'config', '--get', 'remote.origin.url',
416 cwd
=local_dir
, raise_on_failure
=False)
420 # If not checks if it's from a SVN checkout.
421 info
= GetSVNRepoInfo(local_dir
)
428 def UpdatePDB(pdb_filename
, verbose
=True, build_dir
=None, toolchain_dir
=None):
429 """Update a pdb file with source information."""
433 # Blacklisting the build directory allows skipping the generated files, for
434 # Chromium this makes the indexing ~10x faster.
435 build_dir
= (os
.path
.normpath(build_dir
)).lower()
436 for directory
, _
, _
in os
.walk(build_dir
):
437 dir_blacklist
[directory
.lower()] = True
438 dir_blacklist
[build_dir
.lower()] = True
441 # Blacklisting the directories from the toolchain as we don't have revision
443 toolchain_dir
= (os
.path
.normpath(toolchain_dir
)).lower()
444 for directory
, _
, _
in os
.walk(build_dir
):
445 dir_blacklist
[directory
.lower()] = True
446 dir_blacklist
[toolchain_dir
.lower()] = True
448 # Writes the header of the source index stream.
450 # Here's the description of the variables used in the SRC_* macros (those
451 # variables have to be defined for every source file that we want to index):
452 # var1: The file path.
453 # var2: The name of the file without its path.
454 # var3: The revision or the hash of this file's repository.
455 # var4: The URL to this file.
456 # var5: (optional) The python method to call to decode this file, e.g. for
457 # a base64 encoded file this value should be 'base64.b64decode'.
459 'SRCSRV: ini ------------------------------------------------',
462 'VERCTRL=Subversion',
463 'DATETIME=%s' % time
.asctime(),
464 'SRCSRV: variables ------------------------------------------',
465 'SRC_EXTRACT_TARGET_DIR=%targ%\%fnbksl%(%var2%)\%var3%',
466 'SRC_EXTRACT_TARGET=%SRC_EXTRACT_TARGET_DIR%\%fnfile%(%var1%)',
467 'SRC_EXTRACT_CMD=cmd /c "mkdir "%SRC_EXTRACT_TARGET_DIR%" & python -c '
468 '"import urllib2, base64;'
469 'url = \\\"%var4%\\\";'
470 'u = urllib2.urlopen(url);'
471 'print %var5%(u.read());" > "%SRC_EXTRACT_TARGET%""',
472 'SRCSRVTRG=%SRC_EXTRACT_TARGET%',
473 'SRCSRVCMD=%SRC_EXTRACT_CMD%',
474 'SRCSRV: source files ---------------------------------------',
477 if ReadSourceStream(pdb_filename
):
478 raise Exception("PDB already has source indexing information!")
480 filelist
= ExtractSourceFiles(pdb_filename
)
481 number_of_files
= len(filelist
)
482 indexed_files_total
= 0
484 filename
= next(iter(filelist
))
485 filedir
= os
.path
.dirname(filename
)
487 print "[%d / %d] Processing: %s" % (number_of_files
- len(filelist
),
488 number_of_files
, filename
)
490 # This directory is blacklisted, either because it's not part of a
491 # repository, or from one we're not interested in indexing.
492 if dir_blacklist
.get(filedir
, False):
494 print " skipping, directory is blacklisted."
495 filelist
.remove(filename
)
498 # Skip the files that don't exist on the current machine.
499 if not os
.path
.exists(filename
):
500 filelist
.remove(filename
)
503 # Try to index the current file and all the ones coming from the same
505 indexed_files
= IndexFilesFromRepo(filename
, filelist
, lines
)
506 if not indexed_files
:
507 if not DirectoryIsUnderPublicVersionControl(filedir
):
508 dir_blacklist
[filedir
] = True
510 print "Adding %s to the blacklist." % filedir
511 filelist
.remove(filename
)
514 indexed_files_total
+= indexed_files
517 print " %d files have been indexed." % indexed_files
519 lines
.append('SRCSRV: end ------------------------------------------------')
521 WriteSourceStream(pdb_filename
, '\r\n'.join(lines
))
524 print "%d / %d files have been indexed." % (indexed_files_total
,
529 parser
= optparse
.OptionParser()
530 parser
.add_option('-v', '--verbose', action
='store_true', default
=False)
531 parser
.add_option('--build-dir', help='The original build directory, if set '
532 'all the files present in this directory (or one of its subdirectories) '
534 parser
.add_option('--toolchain-dir', help='The directory containing the '
535 'toolchain that has been used for this build. If set all the files '
536 'present in this directory (or one of its subdirectories) will be '
538 options
, args
= parser
.parse_args()
541 parser
.error('Specify a pdb')
544 UpdatePDB(pdb
, options
.verbose
, options
.build_dir
)
549 if __name__
== '__main__':