Merge branch 'jk/cherry-pick-reword' into maint
[tgit.git] / git_remote_helpers / git / git.py
bloba383e6c08d5752df1ff42fa25019273dccdaebc8
1 #!/usr/bin/env python
3 """Functionality for interacting with Git repositories.
5 This module provides classes for interfacing with a Git repository.
6 """
8 import os
9 import re
10 import time
11 from binascii import hexlify
12 from cStringIO import StringIO
13 import unittest
15 from git_remote_helpers.util import debug, error, die, start_command, run_command
18 def get_git_dir ():
19 """Return the path to the GIT_DIR for this repo."""
20 args = ("git", "rev-parse", "--git-dir")
21 exit_code, output, errors = run_command(args)
22 if exit_code:
23 die("Failed to retrieve git dir")
24 assert not errors
25 return output.strip()
28 def parse_git_config ():
29 """Return a dict containing the parsed version of 'git config -l'."""
30 exit_code, output, errors = run_command(("git", "config", "-z", "-l"))
31 if exit_code:
32 die("Failed to retrieve git configuration")
33 assert not errors
34 return dict([e.split('\n', 1) for e in output.split("\0") if e])
37 def git_config_bool (value):
38 """Convert the given git config string value to True or False.
40 Raise ValueError if the given string was not recognized as a
41 boolean value.
43 """
44 norm_value = str(value).strip().lower()
45 if norm_value in ("true", "1", "yes", "on", ""):
46 return True
47 if norm_value in ("false", "0", "no", "off", "none"):
48 return False
49 raise ValueError("Failed to parse '%s' into a boolean value" % (value))
52 def valid_git_ref (ref_name):
53 """Return True iff the given ref name is a valid git ref name."""
54 # The following is a reimplementation of the git check-ref-format
55 # command. The rules were derived from the git check-ref-format(1)
56 # manual page. This code should be replaced by a call to
57 # check_ref_format() in the git library, when such is available.
58 if ref_name.endswith('/') or \
59 ref_name.startswith('.') or \
60 ref_name.count('/.') or \
61 ref_name.count('..') or \
62 ref_name.endswith('.lock'):
63 return False
64 for c in ref_name:
65 if ord(c) < 0x20 or ord(c) == 0x7f or c in " ~^:?*[":
66 return False
67 return True
70 class GitObjectFetcher(object):
72 """Provide parsed access to 'git cat-file --batch'.
74 This provides a read-only interface to the Git object database.
76 """
78 def __init__ (self):
79 """Initiate a 'git cat-file --batch' session."""
80 self.queue = [] # List of object names to be submitted
81 self.in_transit = None # Object name currently in transit
83 # 'git cat-file --batch' produces binary output which is likely
84 # to be corrupted by the default "rU"-mode pipe opened by
85 # start_command. (Mode == "rU" does universal new-line
86 # conversion, which mangles carriage returns.) Therefore, we
87 # open an explicitly binary-safe pipe for transferring the
88 # output from 'git cat-file --batch'.
89 pipe_r_fd, pipe_w_fd = os.pipe()
90 pipe_r = os.fdopen(pipe_r_fd, "rb")
91 pipe_w = os.fdopen(pipe_w_fd, "wb")
92 self.proc = start_command(("git", "cat-file", "--batch"),
93 stdout = pipe_w)
94 self.f = pipe_r
96 def __del__ (self):
97 """Verify completed communication with 'git cat-file --batch'."""
98 assert not self.queue
99 assert self.in_transit is None
100 self.proc.stdin.close()
101 assert self.proc.wait() == 0 # Zero exit code
102 assert self.f.read() == "" # No remaining output
104 def _submit_next_object (self):
105 """Submit queue items to the 'git cat-file --batch' process.
107 If there are items in the queue, and there is currently no item
108 currently in 'transit', then pop the first item off the queue,
109 and submit it.
112 if self.queue and self.in_transit is None:
113 self.in_transit = self.queue.pop(0)
114 print >> self.proc.stdin, self.in_transit[0]
116 def push (self, obj, callback):
117 """Push the given object name onto the queue.
119 The given callback function will at some point in the future
120 be called exactly once with the following arguments:
121 - self - this GitObjectFetcher instance
122 - obj - the object name provided to push()
123 - sha1 - the SHA1 of the object, if 'None' obj is missing
124 - t - the type of the object (tag/commit/tree/blob)
125 - size - the size of the object in bytes
126 - data - the object contents
129 self.queue.append((obj, callback))
130 self._submit_next_object() # (Re)start queue processing
132 def process_next_entry (self):
133 """Read the next entry off the queue and invoke callback."""
134 obj, cb = self.in_transit
135 self.in_transit = None
136 header = self.f.readline()
137 if header == "%s missing\n" % (obj):
138 cb(self, obj, None, None, None, None)
139 return
140 sha1, t, size = header.split(" ")
141 assert len(sha1) == 40
142 assert t in ("tag", "commit", "tree", "blob")
143 assert size.endswith("\n")
144 size = int(size.strip())
145 data = self.f.read(size)
146 assert self.f.read(1) == "\n"
147 cb(self, obj, sha1, t, size, data)
148 self._submit_next_object()
150 def process (self):
151 """Process the current queue until empty."""
152 while self.in_transit is not None:
153 self.process_next_entry()
155 # High-level convenience methods:
157 def get_sha1 (self, objspec):
158 """Return the SHA1 of the object specified by 'objspec'.
160 Return None if 'objspec' does not specify an existing object.
163 class _ObjHandler(object):
164 """Helper class for getting the returned SHA1."""
165 def __init__ (self, parser):
166 self.parser = parser
167 self.sha1 = None
169 def __call__ (self, parser, obj, sha1, t, size, data):
170 # FIXME: Many unused arguments. Could this be cheaper?
171 assert parser == self.parser
172 self.sha1 = sha1
174 handler = _ObjHandler(self)
175 self.push(objspec, handler)
176 self.process()
177 return handler.sha1
179 def open_obj (self, objspec):
180 """Return a file object wrapping the contents of a named object.
182 The caller is responsible for calling .close() on the returned
183 file object.
185 Raise KeyError if 'objspec' does not exist in the repo.
188 class _ObjHandler(object):
189 """Helper class for parsing the returned git object."""
190 def __init__ (self, parser):
191 """Set up helper."""
192 self.parser = parser
193 self.contents = StringIO()
194 self.err = None
196 def __call__ (self, parser, obj, sha1, t, size, data):
197 """Git object callback (see GitObjectFetcher documentation)."""
198 assert parser == self.parser
199 if not sha1: # Missing object
200 self.err = "Missing object '%s'" % obj
201 else:
202 assert size == len(data)
203 self.contents.write(data)
205 handler = _ObjHandler(self)
206 self.push(objspec, handler)
207 self.process()
208 if handler.err:
209 raise KeyError(handler.err)
210 handler.contents.seek(0)
211 return handler.contents
213 def walk_tree (self, tree_objspec, callback, prefix = ""):
214 """Recursively walk the given Git tree object.
216 Recursively walk all subtrees of the given tree object, and
217 invoke the given callback passing three arguments:
218 (path, mode, data) with the path, permission bits, and contents
219 of all the blobs found in the entire tree structure.
222 class _ObjHandler(object):
223 """Helper class for walking a git tree structure."""
224 def __init__ (self, parser, cb, path, mode = None):
225 """Set up helper."""
226 self.parser = parser
227 self.cb = cb
228 self.path = path
229 self.mode = mode
230 self.err = None
232 def parse_tree (self, treedata):
233 """Parse tree object data, yield tree entries.
235 Each tree entry is a 3-tuple (mode, sha1, path)
237 self.path is prepended to all paths yielded
238 from this method.
241 while treedata:
242 mode = int(treedata[:6], 10)
243 # Turn 100xxx into xxx
244 if mode > 100000:
245 mode -= 100000
246 assert treedata[6] == " "
247 i = treedata.find("\0", 7)
248 assert i > 0
249 path = treedata[7:i]
250 sha1 = hexlify(treedata[i + 1: i + 21])
251 yield (mode, sha1, self.path + path)
252 treedata = treedata[i + 21:]
254 def __call__ (self, parser, obj, sha1, t, size, data):
255 """Git object callback (see GitObjectFetcher documentation)."""
256 assert parser == self.parser
257 if not sha1: # Missing object
258 self.err = "Missing object '%s'" % (obj)
259 return
260 assert size == len(data)
261 if t == "tree":
262 if self.path:
263 self.path += "/"
264 # Recurse into all blobs and subtrees
265 for m, s, p in self.parse_tree(data):
266 parser.push(s,
267 self.__class__(self.parser, self.cb, p, m))
268 elif t == "blob":
269 self.cb(self.path, self.mode, data)
270 else:
271 raise ValueError("Unknown object type '%s'" % (t))
273 self.push(tree_objspec, _ObjHandler(self, callback, prefix))
274 self.process()
277 class GitRefMap(object):
279 """Map Git ref names to the Git object names they currently point to.
281 Behaves like a dictionary of Git ref names -> Git object names.
285 def __init__ (self, obj_fetcher):
286 """Create a new Git ref -> object map."""
287 self.obj_fetcher = obj_fetcher
288 self._cache = {} # dict: refname -> objname
290 def _load (self, ref):
291 """Retrieve the object currently bound to the given ref.
293 The name of the object pointed to by the given ref is stored
294 into this mapping, and also returned.
297 if ref not in self._cache:
298 self._cache[ref] = self.obj_fetcher.get_sha1(ref)
299 return self._cache[ref]
301 def __contains__ (self, refname):
302 """Return True if the given refname is present in this cache."""
303 return bool(self._load(refname))
305 def __getitem__ (self, refname):
306 """Return the git object name pointed to by the given refname."""
307 commit = self._load(refname)
308 if commit is None:
309 raise KeyError("Unknown ref '%s'" % (refname))
310 return commit
312 def get (self, refname, default = None):
313 """Return the git object name pointed to by the given refname."""
314 commit = self._load(refname)
315 if commit is None:
316 return default
317 return commit
320 class GitFICommit(object):
322 """Encapsulate the data in a Git fast-import commit command."""
324 SHA1RE = re.compile(r'^[0-9a-f]{40}$')
326 @classmethod
327 def parse_mode (cls, mode):
328 """Verify the given git file mode, and return it as a string."""
329 assert mode in (644, 755, 100644, 100755, 120000)
330 return "%i" % (mode)
332 @classmethod
333 def parse_objname (cls, objname):
334 """Return the given object name (or mark number) as a string."""
335 if isinstance(objname, int): # Object name is a mark number
336 assert objname > 0
337 return ":%i" % (objname)
339 # No existence check is done, only checks for valid format
340 assert cls.SHA1RE.match(objname) # Object name is valid SHA1
341 return objname
343 @classmethod
344 def quote_path (cls, path):
345 """Return a quoted version of the given path."""
346 path = path.replace("\\", "\\\\")
347 path = path.replace("\n", "\\n")
348 path = path.replace('"', '\\"')
349 return '"%s"' % (path)
351 @classmethod
352 def parse_path (cls, path):
353 """Verify that the given path is valid, and quote it, if needed."""
354 assert not isinstance(path, int) # Cannot be a mark number
356 # These checks verify the rules on the fast-import man page
357 assert not path.count("//")
358 assert not path.endswith("/")
359 assert not path.startswith("/")
360 assert not path.count("/./")
361 assert not path.count("/../")
362 assert not path.endswith("/.")
363 assert not path.endswith("/..")
364 assert not path.startswith("./")
365 assert not path.startswith("../")
367 if path.count('"') + path.count('\n') + path.count('\\'):
368 return cls.quote_path(path)
369 return path
371 def __init__ (self, name, email, timestamp, timezone, message):
372 """Create a new Git fast-import commit, with the given metadata."""
373 self.name = name
374 self.email = email
375 self.timestamp = timestamp
376 self.timezone = timezone
377 self.message = message
378 self.pathops = [] # List of path operations in this commit
380 def modify (self, mode, blobname, path):
381 """Add a file modification to this Git fast-import commit."""
382 self.pathops.append(("M",
383 self.parse_mode(mode),
384 self.parse_objname(blobname),
385 self.parse_path(path)))
387 def delete (self, path):
388 """Add a file deletion to this Git fast-import commit."""
389 self.pathops.append(("D", self.parse_path(path)))
391 def copy (self, path, newpath):
392 """Add a file copy to this Git fast-import commit."""
393 self.pathops.append(("C",
394 self.parse_path(path),
395 self.parse_path(newpath)))
397 def rename (self, path, newpath):
398 """Add a file rename to this Git fast-import commit."""
399 self.pathops.append(("R",
400 self.parse_path(path),
401 self.parse_path(newpath)))
403 def note (self, blobname, commit):
404 """Add a note object to this Git fast-import commit."""
405 self.pathops.append(("N",
406 self.parse_objname(blobname),
407 self.parse_objname(commit)))
409 def deleteall (self):
410 """Delete all files in this Git fast-import commit."""
411 self.pathops.append("deleteall")
414 class TestGitFICommit(unittest.TestCase):
416 """GitFICommit selftests."""
418 def test_basic (self):
419 """GitFICommit basic selftests."""
421 def expect_fail (method, data):
422 """Verify that the method(data) raises an AssertionError."""
423 try:
424 method(data)
425 except AssertionError:
426 return
427 raise AssertionError("Failed test for invalid data '%s(%s)'" %
428 (method.__name__, repr(data)))
430 def test_parse_mode (self):
431 """GitFICommit.parse_mode() selftests."""
432 self.assertEqual(GitFICommit.parse_mode(644), "644")
433 self.assertEqual(GitFICommit.parse_mode(755), "755")
434 self.assertEqual(GitFICommit.parse_mode(100644), "100644")
435 self.assertEqual(GitFICommit.parse_mode(100755), "100755")
436 self.assertEqual(GitFICommit.parse_mode(120000), "120000")
437 self.assertRaises(AssertionError, GitFICommit.parse_mode, 0)
438 self.assertRaises(AssertionError, GitFICommit.parse_mode, 123)
439 self.assertRaises(AssertionError, GitFICommit.parse_mode, 600)
440 self.assertRaises(AssertionError, GitFICommit.parse_mode, "644")
441 self.assertRaises(AssertionError, GitFICommit.parse_mode, "abc")
443 def test_parse_objname (self):
444 """GitFICommit.parse_objname() selftests."""
445 self.assertEqual(GitFICommit.parse_objname(1), ":1")
446 self.assertRaises(AssertionError, GitFICommit.parse_objname, 0)
447 self.assertRaises(AssertionError, GitFICommit.parse_objname, -1)
448 self.assertEqual(GitFICommit.parse_objname("0123456789" * 4),
449 "0123456789" * 4)
450 self.assertEqual(GitFICommit.parse_objname("2468abcdef" * 4),
451 "2468abcdef" * 4)
452 self.assertRaises(AssertionError, GitFICommit.parse_objname,
453 "abcdefghij" * 4)
455 def test_parse_path (self):
456 """GitFICommit.parse_path() selftests."""
457 self.assertEqual(GitFICommit.parse_path("foo/bar"), "foo/bar")
458 self.assertEqual(GitFICommit.parse_path("path/with\n and \" in it"),
459 '"path/with\\n and \\" in it"')
460 self.assertRaises(AssertionError, GitFICommit.parse_path, 1)
461 self.assertRaises(AssertionError, GitFICommit.parse_path, 0)
462 self.assertRaises(AssertionError, GitFICommit.parse_path, -1)
463 self.assertRaises(AssertionError, GitFICommit.parse_path, "foo//bar")
464 self.assertRaises(AssertionError, GitFICommit.parse_path, "foo/bar/")
465 self.assertRaises(AssertionError, GitFICommit.parse_path, "/foo/bar")
466 self.assertRaises(AssertionError, GitFICommit.parse_path, "foo/./bar")
467 self.assertRaises(AssertionError, GitFICommit.parse_path, "foo/../bar")
468 self.assertRaises(AssertionError, GitFICommit.parse_path, "foo/bar/.")
469 self.assertRaises(AssertionError, GitFICommit.parse_path, "foo/bar/..")
470 self.assertRaises(AssertionError, GitFICommit.parse_path, "./foo/bar")
471 self.assertRaises(AssertionError, GitFICommit.parse_path, "../foo/bar")
474 class GitFastImport(object):
476 """Encapsulate communication with git fast-import."""
478 def __init__ (self, f, obj_fetcher, last_mark = 0):
479 """Set up self to communicate with a fast-import process through f."""
480 self.f = f # File object where fast-import stream is written
481 self.obj_fetcher = obj_fetcher # GitObjectFetcher instance
482 self.next_mark = last_mark + 1 # Next mark number
483 self.refs = set() # Keep track of the refnames we've seen
485 def comment (self, s):
486 """Write the given comment in the fast-import stream."""
487 assert "\n" not in s, "Malformed comment: '%s'" % (s)
488 self.f.write("# %s\n" % (s))
490 def commit (self, ref, commitdata):
491 """Make a commit on the given ref, with the given GitFICommit.
493 Return the mark number identifying this commit.
496 self.f.write("""\
497 commit %(ref)s
498 mark :%(mark)i
499 committer %(name)s <%(email)s> %(timestamp)i %(timezone)s
500 data %(msgLength)i
501 %(msg)s
502 """ % {
503 'ref': ref,
504 'mark': self.next_mark,
505 'name': commitdata.name,
506 'email': commitdata.email,
507 'timestamp': commitdata.timestamp,
508 'timezone': commitdata.timezone,
509 'msgLength': len(commitdata.message),
510 'msg': commitdata.message,
513 if ref not in self.refs:
514 self.refs.add(ref)
515 parent = ref + "^0"
516 if self.obj_fetcher.get_sha1(parent):
517 self.f.write("from %s\n" % (parent))
519 for op in commitdata.pathops:
520 self.f.write(" ".join(op))
521 self.f.write("\n")
522 self.f.write("\n")
523 retval = self.next_mark
524 self.next_mark += 1
525 return retval
527 def blob (self, data):
528 """Import the given blob.
530 Return the mark number identifying this blob.
533 self.f.write("blob\nmark :%i\ndata %i\n%s\n" %
534 (self.next_mark, len(data), data))
535 retval = self.next_mark
536 self.next_mark += 1
537 return retval
539 def reset (self, ref, objname):
540 """Reset the given ref to point at the given Git object."""
541 self.f.write("reset %s\nfrom %s\n\n" %
542 (ref, GitFICommit.parse_objname(objname)))
543 if ref not in self.refs:
544 self.refs.add(ref)
547 class GitNotes(object):
549 """Encapsulate access to Git notes.
551 Simulates a dictionary of object name (SHA1) -> Git note mappings.
555 def __init__ (self, notes_ref, obj_fetcher):
556 """Create a new Git notes interface, bound to the given notes ref."""
557 self.notes_ref = notes_ref
558 self.obj_fetcher = obj_fetcher # Used to get objects from repo
559 self.imports = [] # list: (objname, note data blob name) tuples
561 def __del__ (self):
562 """Verify that self.commit_notes() was called before destruction."""
563 if self.imports:
564 error("Missing call to self.commit_notes().")
565 error("%i notes are not committed!", len(self.imports))
567 def _load (self, objname):
568 """Return the note data associated with the given git object.
570 The note data is returned in string form. If no note is found
571 for the given object, None is returned.
574 try:
575 f = self.obj_fetcher.open_obj("%s:%s" % (self.notes_ref, objname))
576 ret = f.read()
577 f.close()
578 except KeyError:
579 ret = None
580 return ret
582 def __getitem__ (self, objname):
583 """Return the note contents associated with the given object.
585 Raise KeyError if given object has no associated note.
588 blobdata = self._load(objname)
589 if blobdata is None:
590 raise KeyError("Object '%s' has no note" % (objname))
591 return blobdata
593 def get (self, objname, default = None):
594 """Return the note contents associated with the given object.
596 Return given default if given object has no associated note.
599 blobdata = self._load(objname)
600 if blobdata is None:
601 return default
602 return blobdata
604 def import_note (self, objname, data, gfi):
605 """Tell git fast-import to store data as a note for objname.
607 This method uses the given GitFastImport object to create a
608 blob containing the given note data. Also an entry mapping the
609 given object name to the created blob is stored until
610 commit_notes() is called.
612 Note that this method only works if it is later followed by a
613 call to self.commit_notes() (which produces the note commit
614 that refers to the blob produced here).
617 if not data.endswith("\n"):
618 data += "\n"
619 gfi.comment("Importing note for object %s" % (objname))
620 mark = gfi.blob(data)
621 self.imports.append((objname, mark))
623 def commit_notes (self, gfi, author, message):
624 """Produce a git fast-import note commit for the imported notes.
626 This method uses the given GitFastImport object to create a
627 commit on the notes ref, introducing the notes previously
628 submitted to import_note().
631 if not self.imports:
632 return
633 commitdata = GitFICommit(author[0], author[1],
634 time.time(), "0000", message)
635 for objname, blobname in self.imports:
636 assert isinstance(objname, int) and objname > 0
637 assert isinstance(blobname, int) and blobname > 0
638 commitdata.note(blobname, objname)
639 gfi.commit(self.notes_ref, commitdata)
640 self.imports = []
643 class GitCachedNotes(GitNotes):
645 """Encapsulate access to Git notes (cached version).
647 Only use this class if no caching is done at a higher level.
649 Simulates a dictionary of object name (SHA1) -> Git note mappings.
653 def __init__ (self, notes_ref, obj_fetcher):
654 """Set up a caching wrapper around GitNotes."""
655 GitNotes.__init__(self, notes_ref, obj_fetcher)
656 self._cache = {} # Cache: object name -> note data
658 def __del__ (self):
659 """Verify that GitNotes' destructor is called."""
660 GitNotes.__del__(self)
662 def _load (self, objname):
663 """Extend GitNotes._load() with a local objname -> note cache."""
664 if objname not in self._cache:
665 self._cache[objname] = GitNotes._load(self, objname)
666 return self._cache[objname]
668 def import_note (self, objname, data, gfi):
669 """Extend GitNotes.import_note() with a local objname -> note cache."""
670 if not data.endswith("\n"):
671 data += "\n"
672 assert objname not in self._cache
673 self._cache[objname] = data
674 GitNotes.import_note(self, objname, data, gfi)
677 if __name__ == '__main__':
678 unittest.main()