Add (better) help_text to publish_location and school_name
[Melange.git] / thirdparty / rietveld / upload.py
blobf6f45ff901117f4a40b9dad33090f29269f8ccf9
1 #!/usr/bin/env python
3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 """Tool for uploading diffs from a version control system to the codereview app.
19 Usage summary: upload.py [options] [-- diff_options]
21 Diff options are passed to the diff command of the underlying system.
23 Supported version control systems:
24 Git
25 Mercurial
26 Subversion
28 It is important for Git/Mercurial users to specify a tree/node/branch to diff
29 against by using the '--rev' option.
30 """
31 # This code is derived from appcfg.py in the App Engine SDK (open source),
32 # and from ASPN recipe #146306.
34 import cookielib
35 import getpass
36 import logging
37 import md5
38 import mimetypes
39 import optparse
40 import os
41 import re
42 import socket
43 import subprocess
44 import sys
45 import urllib
46 import urllib2
47 import urlparse
49 try:
50 import readline
51 except ImportError:
52 pass
54 # The logging verbosity:
55 # 0: Errors only.
56 # 1: Status messages.
57 # 2: Info logs.
58 # 3: Debug logs.
59 verbosity = 1
61 # Max size of patch or base file.
62 MAX_UPLOAD_SIZE = 900 * 1024
65 def StatusUpdate(msg):
66 """Print a status message to stdout.
68 If 'verbosity' is greater than 0, print the message.
70 Args:
71 msg: The string to print.
72 """
73 if verbosity > 0:
74 print msg
77 def ErrorExit(msg):
78 """Print an error message to stderr and exit."""
79 print >>sys.stderr, msg
80 sys.exit(1)
83 class ClientLoginError(urllib2.HTTPError):
84 """Raised to indicate there was an error authenticating with ClientLogin."""
86 def __init__(self, url, code, msg, headers, args):
87 urllib2.HTTPError.__init__(self, url, code, msg, headers, None)
88 self.args = args
89 self.reason = args["Error"]
92 class AbstractRpcServer(object):
93 """Provides a common interface for a simple RPC server."""
95 def __init__(self, host, auth_function, host_override=None, extra_headers={},
96 save_cookies=False):
97 """Creates a new HttpRpcServer.
99 Args:
100 host: The host to send requests to.
101 auth_function: A function that takes no arguments and returns an
102 (email, password) tuple when called. Will be called if authentication
103 is required.
104 host_override: The host header to send to the server (defaults to host).
105 extra_headers: A dict of extra headers to append to every request.
106 save_cookies: If True, save the authentication cookies to local disk.
107 If False, use an in-memory cookiejar instead. Subclasses must
108 implement this functionality. Defaults to False.
110 self.host = host
111 self.host_override = host_override
112 self.auth_function = auth_function
113 self.authenticated = False
114 self.extra_headers = extra_headers
115 self.save_cookies = save_cookies
116 self.opener = self._GetOpener()
117 if self.host_override:
118 logging.info("Server: %s; Host: %s", self.host, self.host_override)
119 else:
120 logging.info("Server: %s", self.host)
122 def _GetOpener(self):
123 """Returns an OpenerDirector for making HTTP requests.
125 Returns:
126 A urllib2.OpenerDirector object.
128 raise NotImplementedError()
130 def _CreateRequest(self, url, data=None):
131 """Creates a new urllib request."""
132 logging.debug("Creating request for: '%s' with payload:\n%s", url, data)
133 req = urllib2.Request(url, data=data)
134 if self.host_override:
135 req.add_header("Host", self.host_override)
136 for key, value in self.extra_headers.iteritems():
137 req.add_header(key, value)
138 return req
140 def _GetAuthToken(self, email, password):
141 """Uses ClientLogin to authenticate the user, returning an auth token.
143 Args:
144 email: The user's email address
145 password: The user's password
147 Raises:
148 ClientLoginError: If there was an error authenticating with ClientLogin.
149 HTTPError: If there was some other form of HTTP error.
151 Returns:
152 The authentication token returned by ClientLogin.
154 account_type = "HOSTED_OR_GOOGLE"
155 if self.host.endswith(".google.com"):
156 # Needed for use inside Google.
157 account_type = "HOSTED"
158 req = self._CreateRequest(
159 url="https://www.google.com/accounts/ClientLogin",
160 data=urllib.urlencode({
161 "Email": email,
162 "Passwd": password,
163 "service": "ah",
164 "source": "rietveld-codereview-upload",
165 "accountType": account_type,
168 try:
169 response = self.opener.open(req)
170 response_body = response.read()
171 response_dict = dict(x.split("=")
172 for x in response_body.split("\n") if x)
173 return response_dict["Auth"]
174 except urllib2.HTTPError, e:
175 if e.code == 403:
176 body = e.read()
177 response_dict = dict(x.split("=", 1) for x in body.split("\n") if x)
178 raise ClientLoginError(req.get_full_url(), e.code, e.msg,
179 e.headers, response_dict)
180 else:
181 raise
183 def _GetAuthCookie(self, auth_token):
184 """Fetches authentication cookies for an authentication token.
186 Args:
187 auth_token: The authentication token returned by ClientLogin.
189 Raises:
190 HTTPError: If there was an error fetching the authentication cookies.
192 # This is a dummy value to allow us to identify when we're successful.
193 continue_location = "http://localhost/"
194 args = {"continue": continue_location, "auth": auth_token}
195 req = self._CreateRequest("http://%s/_ah/login?%s" %
196 (self.host, urllib.urlencode(args)))
197 try:
198 response = self.opener.open(req)
199 except urllib2.HTTPError, e:
200 response = e
201 if (response.code != 302 or
202 response.info()["location"] != continue_location):
203 raise urllib2.HTTPError(req.get_full_url(), response.code, response.msg,
204 response.headers, response.fp)
205 self.authenticated = True
207 def _Authenticate(self):
208 """Authenticates the user.
210 The authentication process works as follows:
211 1) We get a username and password from the user
212 2) We use ClientLogin to obtain an AUTH token for the user
213 (see http://code.google.com/apis/accounts/AuthForInstalledApps.html).
214 3) We pass the auth token to /_ah/login on the server to obtain an
215 authentication cookie. If login was successful, it tries to redirect
216 us to the URL we provided.
218 If we attempt to access the upload API without first obtaining an
219 authentication cookie, it returns a 401 response and directs us to
220 authenticate ourselves with ClientLogin.
222 for i in range(3):
223 credentials = self.auth_function()
224 try:
225 auth_token = self._GetAuthToken(credentials[0], credentials[1])
226 except ClientLoginError, e:
227 if e.reason == "BadAuthentication":
228 print >>sys.stderr, "Invalid username or password."
229 continue
230 if e.reason == "CaptchaRequired":
231 print >>sys.stderr, (
232 "Please go to\n"
233 "https://www.google.com/accounts/DisplayUnlockCaptcha\n"
234 "and verify you are a human. Then try again.")
235 break
236 if e.reason == "NotVerified":
237 print >>sys.stderr, "Account not verified."
238 break
239 if e.reason == "TermsNotAgreed":
240 print >>sys.stderr, "User has not agreed to TOS."
241 break
242 if e.reason == "AccountDeleted":
243 print >>sys.stderr, "The user account has been deleted."
244 break
245 if e.reason == "AccountDisabled":
246 print >>sys.stderr, "The user account has been disabled."
247 break
248 if e.reason == "ServiceDisabled":
249 print >>sys.stderr, ("The user's access to the service has been "
250 "disabled.")
251 break
252 if e.reason == "ServiceUnavailable":
253 print >>sys.stderr, "The service is not available; try again later."
254 break
255 raise
256 self._GetAuthCookie(auth_token)
257 return
259 def Send(self, request_path, payload=None,
260 content_type="application/octet-stream",
261 timeout=None,
262 **kwargs):
263 """Sends an RPC and returns the response.
265 Args:
266 request_path: The path to send the request to, eg /api/appversion/create.
267 payload: The body of the request, or None to send an empty request.
268 content_type: The Content-Type header to use.
269 timeout: timeout in seconds; default None i.e. no timeout.
270 (Note: for large requests on OS X, the timeout doesn't work right.)
271 kwargs: Any keyword arguments are converted into query string parameters.
273 Returns:
274 The response body, as a string.
276 # TODO: Don't require authentication. Let the server say
277 # whether it is necessary.
278 if not self.authenticated:
279 self._Authenticate()
281 old_timeout = socket.getdefaulttimeout()
282 socket.setdefaulttimeout(timeout)
283 try:
284 tries = 0
285 while True:
286 tries += 1
287 args = dict(kwargs)
288 url = "http://%s%s" % (self.host, request_path)
289 if args:
290 url += "?" + urllib.urlencode(args)
291 req = self._CreateRequest(url=url, data=payload)
292 req.add_header("Content-Type", content_type)
293 try:
294 f = self.opener.open(req)
295 response = f.read()
296 f.close()
297 return response
298 except urllib2.HTTPError, e:
299 if tries > 3:
300 raise
301 elif e.code == 401:
302 self._Authenticate()
303 ## elif e.code >= 500 and e.code < 600:
304 ## # Server Error - try again.
305 ## continue
306 else:
307 raise
308 finally:
309 socket.setdefaulttimeout(old_timeout)
312 class HttpRpcServer(AbstractRpcServer):
313 """Provides a simplified RPC-style interface for HTTP requests."""
315 def _Authenticate(self):
316 """Save the cookie jar after authentication."""
317 super(HttpRpcServer, self)._Authenticate()
318 if self.save_cookies:
319 StatusUpdate("Saving authentication cookies to %s" % self.cookie_file)
320 self.cookie_jar.save()
322 def _GetOpener(self):
323 """Returns an OpenerDirector that supports cookies and ignores redirects.
325 Returns:
326 A urllib2.OpenerDirector object.
328 opener = urllib2.OpenerDirector()
329 opener.add_handler(urllib2.ProxyHandler())
330 opener.add_handler(urllib2.UnknownHandler())
331 opener.add_handler(urllib2.HTTPHandler())
332 opener.add_handler(urllib2.HTTPDefaultErrorHandler())
333 opener.add_handler(urllib2.HTTPSHandler())
334 opener.add_handler(urllib2.HTTPErrorProcessor())
335 if self.save_cookies:
336 self.cookie_file = os.path.expanduser("~/.codereview_upload_cookies")
337 self.cookie_jar = cookielib.MozillaCookieJar(self.cookie_file)
338 if os.path.exists(self.cookie_file):
339 try:
340 self.cookie_jar.load()
341 self.authenticated = True
342 StatusUpdate("Loaded authentication cookies from %s" %
343 self.cookie_file)
344 except (cookielib.LoadError, IOError):
345 # Failed to load cookies - just ignore them.
346 pass
347 else:
348 # Create an empty cookie file with mode 600
349 fd = os.open(self.cookie_file, os.O_CREAT, 0600)
350 os.close(fd)
351 # Always chmod the cookie file
352 os.chmod(self.cookie_file, 0600)
353 else:
354 # Don't save cookies across runs of update.py.
355 self.cookie_jar = cookielib.CookieJar()
356 opener.add_handler(urllib2.HTTPCookieProcessor(self.cookie_jar))
357 return opener
360 parser = optparse.OptionParser(usage="%prog [options] [-- diff_options]")
361 parser.add_option("-y", "--assume_yes", action="store_true",
362 dest="assume_yes", default=False,
363 help="Assume that the answer to yes/no questions is 'yes'.")
364 # Logging
365 group = parser.add_option_group("Logging options")
366 group.add_option("-q", "--quiet", action="store_const", const=0,
367 dest="verbose", help="Print errors only.")
368 group.add_option("-v", "--verbose", action="store_const", const=2,
369 dest="verbose", default=1,
370 help="Print info level logs (default).")
371 group.add_option("--noisy", action="store_const", const=3,
372 dest="verbose", help="Print all logs.")
373 # Review server
374 group = parser.add_option_group("Review server options")
375 group.add_option("-s", "--server", action="store", dest="server",
376 default="codereviews.googleopensourceprograms.com",
377 metavar="SERVER",
378 help=("The server to upload to. The format is host[:port]. "
379 "Defaults to '%default'."))
380 group.add_option("-e", "--email", action="store", dest="email",
381 metavar="EMAIL", default=None,
382 help="The username to use. Will prompt if omitted.")
383 group.add_option("-H", "--host", action="store", dest="host",
384 metavar="HOST", default=None,
385 help="Overrides the Host header sent with all RPCs.")
386 group.add_option("--no_cookies", action="store_false",
387 dest="save_cookies", default=True,
388 help="Do not save authentication cookies to local disk.")
389 # Issue
390 group = parser.add_option_group("Issue options")
391 group.add_option("-d", "--description", action="store", dest="description",
392 metavar="DESCRIPTION", default=None,
393 help="Optional description when creating an issue.")
394 group.add_option("-f", "--description_file", action="store",
395 dest="description_file", metavar="DESCRIPTION_FILE",
396 default=None,
397 help="Optional path of a file that contains "
398 "the description when creating an issue.")
399 group.add_option("-r", "--reviewers", action="store", dest="reviewers",
400 metavar="REVIEWERS", default=None,
401 help="Add reviewers (comma separated email addresses).")
402 group.add_option("--cc", action="store", dest="cc",
403 metavar="CC", default=None,
404 help="Add CC (comma separated email addresses).")
405 # Upload options
406 group = parser.add_option_group("Patch options")
407 group.add_option("-m", "--message", action="store", dest="message",
408 metavar="MESSAGE", default=None,
409 help="A message to identify the patch. "
410 "Will prompt if omitted.")
411 group.add_option("-i", "--issue", type="int", action="store",
412 metavar="ISSUE", default=None,
413 help="Issue number to which to add. Defaults to new issue.")
414 group.add_option("--download_base", action="store_true",
415 dest="download_base", default=False,
416 help="Base files will be downloaded by the server "
417 "(side-by-side diffs may not work on files with CRs).")
418 group.add_option("--rev", action="store", dest="revision",
419 metavar="REV", default=None,
420 help="Branch/tree/revision to diff against (used by DVCS).")
421 group.add_option("--send_mail", action="store_true",
422 dest="send_mail", default=False,
423 help="Send notification email to reviewers.")
426 def GetRpcServer(options):
427 """Returns an instance of an AbstractRpcServer.
429 Returns:
430 A new AbstractRpcServer, on which RPC calls can be made.
433 rpc_server_class = HttpRpcServer
435 def GetUserCredentials():
436 """Prompts the user for a username and password."""
437 email = options.email
438 if email is None:
439 prompt = "Email (login for uploading to %s): " % options.server
440 email = raw_input(prompt).strip()
441 password = getpass.getpass("Password for %s: " % email)
442 return (email, password)
444 # If this is the dev_appserver, use fake authentication.
445 host = (options.host or options.server).lower()
446 if host == "localhost" or host.startswith("localhost:"):
447 email = options.email
448 if email is None:
449 email = "test@example.com"
450 logging.info("Using debug user %s. Override with --email" % email)
451 server = rpc_server_class(
452 options.server,
453 lambda: (email, "password"),
454 host_override=options.host,
455 extra_headers={"Cookie":
456 'dev_appserver_login="%s:False"' % email},
457 save_cookies=options.save_cookies)
458 # Don't try to talk to ClientLogin.
459 server.authenticated = True
460 return server
462 return rpc_server_class(options.server, GetUserCredentials,
463 host_override=options.host,
464 save_cookies=options.save_cookies)
467 def EncodeMultipartFormData(fields, files):
468 """Encode form fields for multipart/form-data.
470 Args:
471 fields: A sequence of (name, value) elements for regular form fields.
472 files: A sequence of (name, filename, value) elements for data to be
473 uploaded as files.
474 Returns:
475 (content_type, body) ready for httplib.HTTP instance.
477 Source:
478 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/146306
480 BOUNDARY = '-M-A-G-I-C---B-O-U-N-D-A-R-Y-'
481 CRLF = '\r\n'
482 lines = []
483 for (key, value) in fields:
484 lines.append('--' + BOUNDARY)
485 lines.append('Content-Disposition: form-data; name="%s"' % key)
486 lines.append('')
487 lines.append(value)
488 for (key, filename, value) in files:
489 lines.append('--' + BOUNDARY)
490 lines.append('Content-Disposition: form-data; name="%s"; filename="%s"' %
491 (key, filename))
492 lines.append('Content-Type: %s' % GetContentType(filename))
493 lines.append('')
494 lines.append(value)
495 lines.append('--' + BOUNDARY + '--')
496 lines.append('')
497 body = CRLF.join(lines)
498 content_type = 'multipart/form-data; boundary=%s' % BOUNDARY
499 return content_type, body
502 def GetContentType(filename):
503 """Helper to guess the content-type from the filename."""
504 return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
507 # Use a shell for subcommands on Windows to get a PATH search.
508 use_shell = sys.platform.startswith("win")
510 def RunShellWithReturnCode(command, print_output=False,
511 universal_newlines=True):
512 """Executes a command and returns the output from stdout and the return code.
514 Args:
515 command: Command to execute.
516 print_output: If True, the output is printed to stdout.
517 If False, both stdout and stderr are ignored.
518 universal_newlines: Use universal_newlines flag (default: True).
520 Returns:
521 Tuple (output, return code)
523 logging.info("Running %s", command)
524 p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
525 shell=use_shell, universal_newlines=universal_newlines)
526 if print_output:
527 output_array = []
528 while True:
529 line = p.stdout.readline()
530 if not line:
531 break
532 print line.strip("\n")
533 output_array.append(line)
534 output = "".join(output_array)
535 else:
536 output = p.stdout.read()
537 p.wait()
538 errout = p.stderr.read()
539 if print_output and errout:
540 print >>sys.stderr, errout
541 p.stdout.close()
542 p.stderr.close()
543 return output, p.returncode
546 def RunShell(command, silent_ok=False, universal_newlines=True,
547 print_output=False):
548 data, retcode = RunShellWithReturnCode(command, print_output,
549 universal_newlines)
550 if retcode:
551 ErrorExit("Got error status from %s:\n%s" % (command, data))
552 if not silent_ok and not data:
553 ErrorExit("No output from %s" % command)
554 return data
557 class VersionControlSystem(object):
558 """Abstract base class providing an interface to the VCS."""
560 def __init__(self, options):
561 """Constructor.
563 Args:
564 options: Command line options.
566 self.options = options
568 def GenerateDiff(self, args):
569 """Return the current diff as a string.
571 Args:
572 args: Extra arguments to pass to the diff command.
574 raise NotImplementedError(
575 "abstract method -- subclass %s must override" % self.__class__)
577 def GetUnknownFiles(self):
578 """Return a list of files unknown to the VCS."""
579 raise NotImplementedError(
580 "abstract method -- subclass %s must override" % self.__class__)
582 def CheckForUnknownFiles(self):
583 """Show an "are you sure?" prompt if there are unknown files."""
584 unknown_files = self.GetUnknownFiles()
585 if unknown_files:
586 print "The following files are not added to version control:"
587 for line in unknown_files:
588 print line
589 prompt = "Are you sure to continue?(y/N) "
590 answer = raw_input(prompt).strip()
591 if answer != "y":
592 ErrorExit("User aborted")
594 def GetBaseFile(self, filename):
595 """Get the content of the upstream version of a file.
597 Returns:
598 A tuple (base_content, new_content, is_binary, status)
599 base_content: The contents of the base file.
600 new_content: For text files, this is empty. For binary files, this is
601 the contents of the new file, since the diff output won't contain
602 information to reconstruct the current file.
603 is_binary: True iff the file is binary.
604 status: The status of the file.
607 raise NotImplementedError(
608 "abstract method -- subclass %s must override" % self.__class__)
611 def GetBaseFiles(self, diff):
612 """Helper that calls GetBase file for each file in the patch.
614 Returns:
615 A dictionary that maps from filename to GetBaseFile's tuple. Filenames
616 are retrieved based on lines that start with "Index:" or
617 "Property changes on:".
619 files = {}
620 for line in diff.splitlines(True):
621 if line.startswith('Index:') or line.startswith('Property changes on:'):
622 unused, filename = line.split(':', 1)
623 # On Windows if a file has property changes its filename uses '\'
624 # instead of '/'.
625 filename = filename.strip().replace('\\', '/')
626 files[filename] = self.GetBaseFile(filename)
627 return files
630 def UploadBaseFiles(self, issue, rpc_server, patch_list, patchset, options,
631 files):
632 """Uploads the base files (and if necessary, the current ones as well)."""
634 def UploadFile(filename, file_id, content, is_binary, status, is_base):
635 """Uploads a file to the server."""
636 file_too_large = False
637 if is_base:
638 type = "base"
639 else:
640 type = "current"
641 if len(content) > MAX_UPLOAD_SIZE:
642 print ("Not uploading the %s file for %s because it's too large." %
643 (type, filename))
644 file_too_large = True
645 content = ""
646 checksum = md5.new(content).hexdigest()
647 if options.verbose > 0 and not file_too_large:
648 print "Uploading %s file for %s" % (type, filename)
649 url = "/%d/upload_content/%d/%d" % (int(issue), int(patchset), file_id)
650 form_fields = [("filename", filename),
651 ("status", status),
652 ("checksum", checksum),
653 ("is_binary", str(is_binary)),
654 ("is_current", str(not is_base)),
656 if file_too_large:
657 form_fields.append(("file_too_large", "1"))
658 if options.email:
659 form_fields.append(("user", options.email))
660 ctype, body = EncodeMultipartFormData(form_fields,
661 [("data", filename, content)])
662 response_body = rpc_server.Send(url, body,
663 content_type=ctype)
664 if not response_body.startswith("OK"):
665 StatusUpdate(" --> %s" % response_body)
666 sys.exit(1)
668 patches = dict()
669 [patches.setdefault(v, k) for k, v in patch_list]
670 for filename in patches.keys():
671 base_content, new_content, is_binary, status = files[filename]
672 file_id_str = patches.get(filename)
673 if file_id_str.find("nobase") != -1:
674 base_content = None
675 file_id_str = file_id_str[file_id_str.rfind("_") + 1:]
676 file_id = int(file_id_str)
677 if base_content != None:
678 UploadFile(filename, file_id, base_content, is_binary, status, True)
679 if new_content != None:
680 UploadFile(filename, file_id, new_content, is_binary, status, False)
682 def IsImage(self, filename):
683 """Returns true if the filename has an image extension."""
684 mimetype = mimetypes.guess_type(filename)[0]
685 if not mimetype:
686 return False
687 return mimetype.startswith("image/")
690 class SubversionVCS(VersionControlSystem):
691 """Implementation of the VersionControlSystem interface for Subversion."""
693 def __init__(self, options):
694 super(SubversionVCS, self).__init__(options)
695 if self.options.revision:
696 match = re.match(r"(\d+)(:(\d+))?", self.options.revision)
697 if not match:
698 ErrorExit("Invalid Subversion revision %s." % self.options.revision)
699 self.rev_start = match.group(1)
700 self.rev_end = match.group(3)
701 else:
702 self.rev_start = self.rev_end = None
703 # Cache output from "svn list -r REVNO dirname".
704 # Keys: dirname, Values: 2-tuple (ouput for start rev and end rev).
705 self.svnls_cache = {}
706 # SVN base URL is required to fetch files deleted in an older revision.
707 # Result is cached to not guess it over and over again in GetBaseFile().
708 required = self.options.download_base or self.options.revision is not None
709 self.svn_base = self._GuessBase(required)
711 def GuessBase(self, required):
712 """Wrapper for _GuessBase."""
713 return self.svn_base
715 def _GuessBase(self, required):
716 """Returns the SVN base URL.
718 Args:
719 required: If true, exits if the url can't be guessed, otherwise None is
720 returned.
722 info = RunShell(["svn", "info"])
723 for line in info.splitlines():
724 words = line.split()
725 if len(words) == 2 and words[0] == "URL:":
726 url = words[1]
727 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
728 username, netloc = urllib.splituser(netloc)
729 if username:
730 logging.info("Removed username from base URL")
731 if netloc.endswith("svn.python.org"):
732 if netloc == "svn.python.org":
733 if path.startswith("/projects/"):
734 path = path[9:]
735 elif netloc != "pythondev@svn.python.org":
736 ErrorExit("Unrecognized Python URL: %s" % url)
737 base = "http://svn.python.org/view/*checkout*%s/" % path
738 logging.info("Guessed Python base = %s", base)
739 elif netloc.endswith("svn.collab.net"):
740 if path.startswith("/repos/"):
741 path = path[6:]
742 base = "http://svn.collab.net/viewvc/*checkout*%s/" % path
743 logging.info("Guessed CollabNet base = %s", base)
744 elif netloc.endswith(".googlecode.com"):
745 path = path + "/"
746 base = urlparse.urlunparse(("http", netloc, path, params,
747 query, fragment))
748 logging.info("Guessed Google Code base = %s", base)
749 else:
750 path = path + "/"
751 base = urlparse.urlunparse((scheme, netloc, path, params,
752 query, fragment))
753 logging.info("Guessed base = %s", base)
754 return base
755 if required:
756 ErrorExit("Can't find URL in output from svn info")
757 return None
759 def GenerateDiff(self, args):
760 cmd = ["svn", "diff"]
761 if self.options.revision:
762 cmd += ["-r", self.options.revision]
763 cmd.extend(args)
764 data = RunShell(cmd)
765 count = 0
766 for line in data.splitlines():
767 if line.startswith("Index:") or line.startswith("Property changes on:"):
768 count += 1
769 logging.info(line)
770 if not count:
771 ErrorExit("No valid patches found in output from svn diff")
772 return data
774 def _CollapseKeywords(self, content, keyword_str):
775 """Collapses SVN keywords."""
776 # svn cat translates keywords but svn diff doesn't. As a result of this
777 # behavior patching.PatchChunks() fails with a chunk mismatch error.
778 # This part was originally written by the Review Board development team
779 # who had the same problem (http://reviews.review-board.org/r/276/).
780 # Mapping of keywords to known aliases
781 svn_keywords = {
782 # Standard keywords
783 'Date': ['Date', 'LastChangedDate'],
784 'Revision': ['Revision', 'LastChangedRevision', 'Rev'],
785 'Author': ['Author', 'LastChangedBy'],
786 'HeadURL': ['HeadURL', 'URL'],
787 'Id': ['Id'],
789 # Aliases
790 'LastChangedDate': ['LastChangedDate', 'Date'],
791 'LastChangedRevision': ['LastChangedRevision', 'Rev', 'Revision'],
792 'LastChangedBy': ['LastChangedBy', 'Author'],
793 'URL': ['URL', 'HeadURL'],
796 def repl(m):
797 if m.group(2):
798 return "$%s::%s$" % (m.group(1), " " * len(m.group(3)))
799 return "$%s$" % m.group(1)
800 keywords = [keyword
801 for name in keyword_str.split(" ")
802 for keyword in svn_keywords.get(name, [])]
803 return re.sub(r"\$(%s):(:?)([^\$]+)\$" % '|'.join(keywords), repl, content)
805 def GetUnknownFiles(self):
806 status = RunShell(["svn", "status", "--ignore-externals"], silent_ok=True)
807 unknown_files = []
808 for line in status.split("\n"):
809 if line and line[0] == "?":
810 unknown_files.append(line)
811 return unknown_files
813 def ReadFile(self, filename):
814 """Returns the contents of a file."""
815 file = open(filename, 'rb')
816 result = ""
817 try:
818 result = file.read()
819 finally:
820 file.close()
821 return result
823 def GetStatus(self, filename):
824 """Returns the status of a file."""
825 if not self.options.revision:
826 status = RunShell(["svn", "status", "--ignore-externals", filename])
827 if not status:
828 ErrorExit("svn status returned no output for %s" % filename)
829 status_lines = status.splitlines()
830 # If file is in a cl, the output will begin with
831 # "\n--- Changelist 'cl_name':\n". See
832 # http://svn.collab.net/repos/svn/trunk/notes/changelist-design.txt
833 if (len(status_lines) == 3 and
834 not status_lines[0] and
835 status_lines[1].startswith("--- Changelist")):
836 status = status_lines[2]
837 else:
838 status = status_lines[0]
839 # If we have a revision to diff against we need to run "svn list"
840 # for the old and the new revision and compare the results to get
841 # the correct status for a file.
842 else:
843 dirname, relfilename = os.path.split(filename)
844 if dirname not in self.svnls_cache:
845 cmd = ["svn", "list", "-r", self.rev_start, dirname or "."]
846 out, returncode = RunShellWithReturnCode(cmd)
847 if returncode:
848 ErrorExit("Failed to get status for %s." % filename)
849 old_files = out.splitlines()
850 args = ["svn", "list"]
851 if self.rev_end:
852 args += ["-r", self.rev_end]
853 cmd = args + [dirname or "."]
854 out, returncode = RunShellWithReturnCode(cmd)
855 if returncode:
856 ErrorExit("Failed to run command %s" % cmd)
857 self.svnls_cache[dirname] = (old_files, out.splitlines())
858 old_files, new_files = self.svnls_cache[dirname]
859 if relfilename in old_files and relfilename not in new_files:
860 status = "D "
861 elif relfilename in old_files and relfilename in new_files:
862 status = "M "
863 else:
864 status = "A "
865 return status
867 def GetBaseFile(self, filename):
868 status = self.GetStatus(filename)
869 base_content = None
870 new_content = None
872 # If a file is copied its status will be "A +", which signifies
873 # "addition-with-history". See "svn st" for more information. We need to
874 # upload the original file or else diff parsing will fail if the file was
875 # edited.
876 if status[0] == "A" and status[3] != "+":
877 # We'll need to upload the new content if we're adding a binary file
878 # since diff's output won't contain it.
879 mimetype = RunShell(["svn", "propget", "svn:mime-type", filename],
880 silent_ok=True)
881 base_content = ""
882 is_binary = mimetype and not mimetype.startswith("text/")
883 if is_binary and self.IsImage(filename):
884 new_content = self.ReadFile(filename)
885 elif (status[0] in ("M", "D", "R") or
886 (status[0] == "A" and status[3] == "+") or # Copied file.
887 (status[0] == " " and status[1] == "M")): # Property change.
888 args = []
889 if self.options.revision:
890 url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start)
891 else:
892 # Don't change filename, it's needed later.
893 url = filename
894 args += ["-r", "BASE"]
895 cmd = ["svn"] + args + ["propget", "svn:mime-type", url]
896 mimetype, returncode = RunShellWithReturnCode(cmd)
897 if returncode:
898 # File does not exist in the requested revision.
899 # Reset mimetype, it contains an error message.
900 mimetype = ""
901 get_base = False
902 is_binary = mimetype and not mimetype.startswith("text/")
903 if status[0] == " ":
904 # Empty base content just to force an upload.
905 base_content = ""
906 elif is_binary:
907 if self.IsImage(filename):
908 get_base = True
909 if status[0] == "M":
910 if not self.rev_end:
911 new_content = self.ReadFile(filename)
912 else:
913 url = "%s/%s@%s" % (self.svn_base, filename, self.rev_end)
914 new_content = RunShell(["svn", "cat", url],
915 universal_newlines=True)
916 else:
917 base_content = ""
918 else:
919 get_base = True
921 if get_base:
922 if is_binary:
923 universal_newlines = False
924 else:
925 universal_newlines = True
926 if self.rev_start:
927 # "svn cat -r REV delete_file.txt" doesn't work. cat requires
928 # the full URL with "@REV" appended instead of using "-r" option.
929 url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start)
930 base_content = RunShell(["svn", "cat", url],
931 universal_newlines=universal_newlines)
932 else:
933 base_content = RunShell(["svn", "cat", filename],
934 universal_newlines=universal_newlines)
935 if not is_binary:
936 args = []
937 if self.rev_start:
938 url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start)
939 else:
940 url = filename
941 args += ["-r", "BASE"]
942 cmd = ["svn"] + args + ["propget", "svn:keywords", url]
943 keywords, returncode = RunShellWithReturnCode(cmd)
944 if keywords and not returncode:
945 base_content = self._CollapseKeywords(base_content, keywords)
946 else:
947 StatusUpdate("svn status returned unexpected output: %s" % status)
948 sys.exit(1)
949 return base_content, new_content, is_binary, status[0:5]
952 class GitVCS(VersionControlSystem):
953 """Implementation of the VersionControlSystem interface for Git."""
955 def __init__(self, options):
956 super(GitVCS, self).__init__(options)
957 # Map of filename -> hash of base file.
958 self.base_hashes = {}
960 def GenerateDiff(self, extra_args):
961 # This is more complicated than svn's GenerateDiff because we must convert
962 # the diff output to include an svn-style "Index:" line as well as record
963 # the hashes of the base files, so we can upload them along with our diff.
964 if self.options.revision:
965 extra_args = [self.options.revision] + extra_args
966 gitdiff = RunShell(["git", "diff", "--full-index"] + extra_args)
967 svndiff = []
968 filecount = 0
969 filename = None
970 for line in gitdiff.splitlines():
971 match = re.match(r"diff --git a/(.*) b/.*$", line)
972 if match:
973 filecount += 1
974 filename = match.group(1)
975 svndiff.append("Index: %s\n" % filename)
976 else:
977 # The "index" line in a git diff looks like this (long hashes elided):
978 # index 82c0d44..b2cee3f 100755
979 # We want to save the left hash, as that identifies the base file.
980 match = re.match(r"index (\w+)\.\.", line)
981 if match:
982 self.base_hashes[filename] = match.group(1)
983 svndiff.append(line + "\n")
984 if not filecount:
985 ErrorExit("No valid patches found in output from git diff")
986 return "".join(svndiff)
988 def GetUnknownFiles(self):
989 status = RunShell(["git", "ls-files", "--exclude-standard", "--others"],
990 silent_ok=True)
991 return status.splitlines()
993 def GetBaseFile(self, filename):
994 hash = self.base_hashes[filename]
995 base_content = None
996 new_content = None
997 is_binary = False
998 if hash == "0" * 40: # All-zero hash indicates no base file.
999 status = "A"
1000 base_content = ""
1001 else:
1002 status = "M"
1003 base_content = RunShell(["git", "show", hash])
1004 return (base_content, new_content, is_binary, status)
1007 class MercurialVCS(VersionControlSystem):
1008 """Implementation of the VersionControlSystem interface for Mercurial."""
1010 def __init__(self, options, repo_dir):
1011 super(MercurialVCS, self).__init__(options)
1012 # Absolute path to repository (we can be in a subdir)
1013 self.repo_dir = os.path.normpath(repo_dir)
1014 # Compute the subdir
1015 cwd = os.path.normpath(os.getcwd())
1016 assert cwd.startswith(self.repo_dir)
1017 self.subdir = cwd[len(self.repo_dir):].lstrip(r"\/")
1018 if self.options.revision:
1019 self.base_rev = self.options.revision
1020 else:
1021 self.base_rev = RunShell(["hg", "parent", "-q"]).split(':')[1].strip()
1023 def _GetRelPath(self, filename):
1024 """Get relative path of a file according to the current directory,
1025 given its logical path in the repo."""
1026 assert filename.startswith(self.subdir), filename
1027 return filename[len(self.subdir):].lstrip(r"\/")
1029 def GenerateDiff(self, extra_args):
1030 # If no file specified, restrict to the current subdir
1031 extra_args = extra_args or ["."]
1032 cmd = ["hg", "diff", "--git", "-r", self.base_rev] + extra_args
1033 data = RunShell(cmd, silent_ok=True)
1034 svndiff = []
1035 filecount = 0
1036 for line in data.splitlines():
1037 m = re.match("diff --git a/(\S+) b/(\S+)", line)
1038 if m:
1039 # Modify line to make it look like as it comes from svn diff.
1040 # With this modification no changes on the server side are required
1041 # to make upload.py work with Mercurial repos.
1042 # NOTE: for proper handling of moved/copied files, we have to use
1043 # the second filename.
1044 filename = m.group(2)
1045 svndiff.append("Index: %s" % filename)
1046 svndiff.append("=" * 67)
1047 filecount += 1
1048 logging.info(line)
1049 else:
1050 svndiff.append(line)
1051 if not filecount:
1052 ErrorExit("No valid patches found in output from hg diff")
1053 return "\n".join(svndiff) + "\n"
1055 def GetUnknownFiles(self):
1056 """Return a list of files unknown to the VCS."""
1057 args = []
1058 status = RunShell(["hg", "status", "--rev", self.base_rev, "-u", "."],
1059 silent_ok=True)
1060 unknown_files = []
1061 for line in status.splitlines():
1062 st, fn = line.split(" ", 1)
1063 if st == "?":
1064 unknown_files.append(fn)
1065 return unknown_files
1067 def GetBaseFile(self, filename):
1068 # "hg status" and "hg cat" both take a path relative to the current subdir
1069 # rather than to the repo root, but "hg diff" has given us the full path
1070 # to the repo root.
1071 base_content = ""
1072 new_content = None
1073 is_binary = False
1074 oldrelpath = relpath = self._GetRelPath(filename)
1075 # "hg status -C" returns two lines for moved/copied files, one otherwise
1076 out = RunShell(["hg", "status", "-C", "--rev", self.base_rev, relpath])
1077 out = out.splitlines()
1078 # HACK: strip error message about missing file/directory if it isn't in
1079 # the working copy
1080 if out[0].startswith('%s: ' % relpath):
1081 out = out[1:]
1082 if len(out) > 1:
1083 # Moved/copied => considered as modified, use old filename to
1084 # retrieve base contents
1085 oldrelpath = out[1].strip()
1086 status = "M"
1087 else:
1088 status, _ = out[0].split(' ', 1)
1089 if status != "A":
1090 base_content = RunShell(["hg", "cat", "-r", self.base_rev, oldrelpath],
1091 silent_ok=True)
1092 is_binary = "\0" in base_content # Mercurial's heuristic
1093 if status != "R":
1094 new_content = open(relpath, "rb").read()
1095 is_binary = is_binary or "\0" in new_content
1096 if is_binary and base_content:
1097 # Fetch again without converting newlines
1098 base_content = RunShell(["hg", "cat", "-r", self.base_rev, oldrelpath],
1099 silent_ok=True, universal_newlines=False)
1100 if not is_binary or not self.IsImage(relpath):
1101 new_content = None
1102 return base_content, new_content, is_binary, status
1105 # NOTE: The SplitPatch function is duplicated in engine.py, keep them in sync.
1106 def SplitPatch(data):
1107 """Splits a patch into separate pieces for each file.
1109 Args:
1110 data: A string containing the output of svn diff.
1112 Returns:
1113 A list of 2-tuple (filename, text) where text is the svn diff output
1114 pertaining to filename.
1116 patches = []
1117 filename = None
1118 diff = []
1119 for line in data.splitlines(True):
1120 new_filename = None
1121 if line.startswith('Index:'):
1122 unused, new_filename = line.split(':', 1)
1123 new_filename = new_filename.strip()
1124 elif line.startswith('Property changes on:'):
1125 unused, temp_filename = line.split(':', 1)
1126 # When a file is modified, paths use '/' between directories, however
1127 # when a property is modified '\' is used on Windows. Make them the same
1128 # otherwise the file shows up twice.
1129 temp_filename = temp_filename.strip().replace('\\', '/')
1130 if temp_filename != filename:
1131 # File has property changes but no modifications, create a new diff.
1132 new_filename = temp_filename
1133 if new_filename:
1134 if filename and diff:
1135 patches.append((filename, ''.join(diff)))
1136 filename = new_filename
1137 diff = [line]
1138 continue
1139 if diff is not None:
1140 diff.append(line)
1141 if filename and diff:
1142 patches.append((filename, ''.join(diff)))
1143 return patches
1146 def UploadSeparatePatches(issue, rpc_server, patchset, data, options):
1147 """Uploads a separate patch for each file in the diff output.
1149 Returns a list of [patch_key, filename] for each file.
1151 patches = SplitPatch(data)
1152 rv = []
1153 for patch in patches:
1154 if len(patch[1]) > MAX_UPLOAD_SIZE:
1155 print ("Not uploading the patch for " + patch[0] +
1156 " because the file is too large.")
1157 continue
1158 form_fields = [("filename", patch[0])]
1159 if not options.download_base:
1160 form_fields.append(("content_upload", "1"))
1161 files = [("data", "data.diff", patch[1])]
1162 ctype, body = EncodeMultipartFormData(form_fields, files)
1163 url = "/%d/upload_patch/%d" % (int(issue), int(patchset))
1164 print "Uploading patch for " + patch[0]
1165 response_body = rpc_server.Send(url, body, content_type=ctype)
1166 lines = response_body.splitlines()
1167 if not lines or lines[0] != "OK":
1168 StatusUpdate(" --> %s" % response_body)
1169 sys.exit(1)
1170 rv.append([lines[1], patch[0]])
1171 return rv
1174 def GuessVCS(options):
1175 """Helper to guess the version control system.
1177 This examines the current directory, guesses which VersionControlSystem
1178 we're using, and returns an instance of the appropriate class. Exit with an
1179 error if we can't figure it out.
1181 Returns:
1182 A VersionControlSystem instance. Exits if the VCS can't be guessed.
1184 # Mercurial has a command to get the base directory of a repository
1185 # Try running it, but don't die if we don't have hg installed.
1186 # NOTE: we try Mercurial first as it can sit on top of an SVN working copy.
1187 try:
1188 out, returncode = RunShellWithReturnCode(["hg", "root"])
1189 if returncode == 0:
1190 return MercurialVCS(options, out.strip())
1191 except OSError, (errno, message):
1192 if errno != 2: # ENOENT -- they don't have hg installed.
1193 raise
1195 # Subversion has a .svn in all working directories.
1196 if os.path.isdir('.svn'):
1197 logging.info("Guessed VCS = Subversion")
1198 return SubversionVCS(options)
1200 # Git has a command to test if you're in a git tree.
1201 # Try running it, but don't die if we don't have git installed.
1202 try:
1203 out, returncode = RunShellWithReturnCode(["git", "rev-parse",
1204 "--is-inside-work-tree"])
1205 if returncode == 0:
1206 return GitVCS(options)
1207 except OSError, (errno, message):
1208 if errno != 2: # ENOENT -- they don't have git installed.
1209 raise
1211 ErrorExit(("Could not guess version control system. "
1212 "Are you in a working copy directory?"))
1215 def RealMain(argv, data=None):
1216 logging.basicConfig(format=("%(asctime).19s %(levelname)s %(filename)s:"
1217 "%(lineno)s %(message)s "))
1218 os.environ['LC_ALL'] = 'C'
1219 options, args = parser.parse_args(argv[1:])
1220 global verbosity
1221 verbosity = options.verbose
1222 if verbosity >= 3:
1223 logging.getLogger().setLevel(logging.DEBUG)
1224 elif verbosity >= 2:
1225 logging.getLogger().setLevel(logging.INFO)
1226 vcs = GuessVCS(options)
1227 if isinstance(vcs, SubversionVCS):
1228 # base field is only allowed for Subversion.
1229 # Note: Fetching base files may become deprecated in future releases.
1230 base = vcs.GuessBase(options.download_base)
1231 else:
1232 base = None
1233 if not base and options.download_base:
1234 options.download_base = True
1235 logging.info("Enabled upload of base file")
1236 if not options.assume_yes:
1237 vcs.CheckForUnknownFiles()
1238 if data is None:
1239 data = vcs.GenerateDiff(args)
1240 files = vcs.GetBaseFiles(data)
1241 if verbosity >= 1:
1242 print "Upload server:", options.server, "(change with -s/--server)"
1243 if options.issue:
1244 prompt = "Message describing this patch set: "
1245 else:
1246 prompt = "New issue subject: "
1247 message = options.message or raw_input(prompt).strip()
1248 if not message:
1249 ErrorExit("A non-empty message is required")
1250 rpc_server = GetRpcServer(options)
1251 form_fields = [("subject", message)]
1252 if base:
1253 form_fields.append(("base", base))
1254 if options.issue:
1255 form_fields.append(("issue", str(options.issue)))
1256 if options.email:
1257 form_fields.append(("user", options.email))
1258 if options.reviewers:
1259 for reviewer in options.reviewers.split(','):
1260 if "@" in reviewer and not reviewer.split("@")[1].count(".") == 1:
1261 ErrorExit("Invalid email address: %s" % reviewer)
1262 form_fields.append(("reviewers", options.reviewers))
1263 if options.cc:
1264 for cc in options.cc.split(','):
1265 if "@" in cc and not cc.split("@")[1].count(".") == 1:
1266 ErrorExit("Invalid email address: %s" % cc)
1267 form_fields.append(("cc", options.cc))
1268 description = options.description
1269 if options.description_file:
1270 if options.description:
1271 ErrorExit("Can't specify description and description_file")
1272 file = open(options.description_file, 'r')
1273 description = file.read()
1274 file.close()
1275 if description:
1276 form_fields.append(("description", description))
1277 # Send a hash of all the base file so the server can determine if a copy
1278 # already exists in an earlier patchset.
1279 base_hashes = ""
1280 for file, info in files.iteritems():
1281 if not info[0] is None:
1282 checksum = md5.new(info[0]).hexdigest()
1283 if base_hashes:
1284 base_hashes += "|"
1285 base_hashes += checksum + ":" + file
1286 form_fields.append(("base_hashes", base_hashes))
1287 # If we're uploading base files, don't send the email before the uploads, so
1288 # that it contains the file status.
1289 if options.send_mail and options.download_base:
1290 form_fields.append(("send_mail", "1"))
1291 if not options.download_base:
1292 form_fields.append(("content_upload", "1"))
1293 if len(data) > MAX_UPLOAD_SIZE:
1294 print "Patch is large, so uploading file patches separately."
1295 uploaded_diff_file = []
1296 form_fields.append(("separate_patches", "1"))
1297 else:
1298 uploaded_diff_file = [("data", "data.diff", data)]
1299 ctype, body = EncodeMultipartFormData(form_fields, uploaded_diff_file)
1300 response_body = rpc_server.Send("/upload", body, content_type=ctype)
1301 if not options.download_base or not uploaded_diff_file:
1302 lines = response_body.splitlines()
1303 if len(lines) >= 2:
1304 msg = lines[0]
1305 patchset = lines[1].strip()
1306 patches = [x.split(" ", 1) for x in lines[2:]]
1307 else:
1308 msg = response_body
1309 else:
1310 msg = response_body
1311 StatusUpdate(msg)
1312 if not response_body.startswith("Issue created.") and \
1313 not response_body.startswith("Issue updated."):
1314 sys.exit(0)
1315 issue = msg[msg.rfind("/")+1:]
1317 if not uploaded_diff_file:
1318 result = UploadSeparatePatches(issue, rpc_server, patchset, data, options)
1319 if not options.download_base:
1320 patches = result
1322 if not options.download_base:
1323 vcs.UploadBaseFiles(issue, rpc_server, patches, patchset, options, files)
1324 if options.send_mail:
1325 rpc_server.Send("/" + issue + "/mail", payload="")
1326 return issue
1329 def main():
1330 try:
1331 RealMain(sys.argv)
1332 except KeyboardInterrupt:
1333 print
1334 StatusUpdate("Interrupted.")
1335 sys.exit(1)
1338 if __name__ == "__main__":
1339 main()