Tools/scripts/ndiff.py

   1 #! /usr/bin/env python
   2
   3 # Module ndiff version 1.4.0
   4 # Released to the public domain 27-Mar-1999,
   5 # by Tim Peters (tim_one@email.msn.com).
   6
   7 # Provided as-is; use at your own risk; no warranty; no promises; enjoy!
   8
   9 """ndiff [-q] file1 file2
  10     or
  11 ndiff (-r1 | -r2) < ndiff_output > file1_or_file2
  12
  13 Print a human-friendly file difference report to stdout.  Both inter-
  14 and intra-line differences are noted.  In the second form, recreate file1
  15 (-r1) or file2 (-r2) on stdout, from an ndiff report on stdin.
  16
  17 In the first form, if -q ("quiet") is not specified, the first two lines
  18 of output are
  19
  20 -: file1
  21 +: file2
  22
  23 Each remaining line begins with a two-letter code:
  24
  25     "- "    line unique to file1
  26     "+ "    line unique to file2
  27     "  "    line common to both files
  28     "? "    line not present in either input file
  29
  30 Lines beginning with "? " attempt to guide the eye to intraline
  31 differences, and were not present in either input file.  These lines can
  32 be confusing if the source files contain tab characters.
  33
  34 The first file can be recovered by retaining only lines that begin with
  35 "  " or "- ", and deleting those 2-character prefixes; use ndiff with -r1.
  36
  37 The second file can be recovered similarly, but by retaining only "  "
  38 and "+ " lines; use ndiff with -r2; or, on Unix, the second file can be
  39 recovered by piping the output through
  40
  41     sed -n '/^[+ ] /s/^..//p'
  42
  43 See module comments for details and programmatic interface.
  44 """
  45
  46 __version__ = 1, 4, 0
  47
  48 # SequenceMatcher tries to compute a "human-friendly diff" between
  49 # two sequences (chiefly picturing a file as a sequence of lines,
  50 # and a line as a sequence of characters, here).  Unlike e.g. UNIX(tm)
  51 # diff, the fundamental notion is the longest *contiguous* & junk-free
  52 # matching subsequence.  That's what catches peoples' eyes.  The
  53 # Windows(tm) windiff has another interesting notion, pairing up elements
  54 # that appear uniquely in each sequence.  That, and the method here,
  55 # appear to yield more intuitive difference reports than does diff.  This
  56 # method appears to be the least vulnerable to synching up on blocks
  57 # of "junk lines", though (like blank lines in ordinary text files,
  58 # or maybe "<P>" lines in HTML files).  That may be because this is
  59 # the only method of the 3 that has a *concept* of "junk" <wink>.
  60 #
  61 # Note that ndiff makes no claim to produce a *minimal* diff.  To the
  62 # contrary, minimal diffs are often counter-intuitive, because they
  63 # synch up anywhere possible, sometimes accidental matches 100 pages
  64 # apart.  Restricting synch points to contiguous matches preserves some
  65 # notion of locality, at the occasional cost of producing a longer diff.
  66 #
  67 # With respect to junk, an earlier version of ndiff simply refused to
  68 # *start* a match with a junk element.  The result was cases like this:
  69 #     before: private Thread currentThread;
  70 #     after:  private volatile Thread currentThread;
  71 # If you consider whitespace to be junk, the longest contiguous match
  72 # not starting with junk is "e Thread currentThread".  So ndiff reported
  73 # that "e volatil" was inserted between the 't' and the 'e' in "private".
  74 # While an accurate view, to people that's absurd.  The current version
  75 # looks for matching blocks that are entirely junk-free, then extends the
  76 # longest one of those as far as possible but only with matching junk.
  77 # So now "currentThread" is matched, then extended to suck up the
  78 # preceding blank; then "private" is matched, and extended to suck up the
  79 # following blank; then "Thread" is matched; and finally ndiff reports
  80 # that "volatile " was inserted before "Thread".  The only quibble
  81 # remaining is that perhaps it was really the case that " volatile"
  82 # was inserted after "private".  I can live with that <wink>.
  83 #
  84 # NOTE on junk:  the module-level names
  85 #    IS_LINE_JUNK
  86 #    IS_CHARACTER_JUNK
  87 # can be set to any functions you like.  The first one should accept
  88 # a single string argument, and return true iff the string is junk.
  89 # The default is whether the regexp r"\s*#?\s*$" matches (i.e., a
  90 # line without visible characters, except for at most one splat).
  91 # The second should accept a string of length 1 etc.  The default is
  92 # whether the character is a blank or tab (note: bad idea to include
  93 # newline in this!).
  94 #
  95 # After setting those, you can call fcompare(f1name, f2name) with the
  96 # names of the files you want to compare.  The difference report
  97 # is sent to stdout.  Or you can call main(args), passing what would
  98 # have been in sys.argv[1:] had the cmd-line form been used.
  99
 100 import string
 101 TRACE = 0
 102
 103 # define what "junk" means
 104 import re
 105
 106 def IS_LINE_JUNK(line, pat=re.compile(r"\s*#?\s*$").match):
 107     return pat(line) is not None
 108
 109 def IS_CHARACTER_JUNK(ch, ws=" \t"):
 110     return ch in ws
 111
 112 del re
 113
 114 class SequenceMatcher:
 115     def __init__(self, isjunk=None, a='', b=''):
 116         # Members:
 117         # a
 118         #      first sequence
 119         # b
 120         #      second sequence; differences are computed as "what do
 121         #      we need to do to 'a' to change it into 'b'?"
 122         # b2j
 123         #      for x in b, b2j[x] is a list of the indices (into b)
 124         #      at which x appears; junk elements do not appear
 125         # b2jhas
 126         #      b2j.has_key
 127         # fullbcount
 128         #      for x in b, fullbcount[x] == the number of times x
 129         #      appears in b; only materialized if really needed (used
 130         #      only for computing quick_ratio())
 131         # matching_blocks
 132         #      a list of (i, j, k) triples, where a[i:i+k] == b[j:j+k];
 133         #      ascending & non-overlapping in i and in j; terminated by
 134         #      a dummy (len(a), len(b), 0) sentinel
 135         # opcodes
 136         #      a list of (tag, i1, i2, j1, j2) tuples, where tag is
 137         #      one of
 138         #          'replace'   a[i1:i2] should be replaced by b[j1:j2]
 139         #          'delete'    a[i1:i2] should be deleted
 140         #          'insert'    b[j1:j2] should be inserted
 141         #          'equal'     a[i1:i2] == b[j1:j2]
 142         # isjunk
 143         #      a user-supplied function taking a sequence element and
 144         #      returning true iff the element is "junk" -- this has
 145         #      subtle but helpful effects on the algorithm, which I'll
 146         #      get around to writing up someday <0.9 wink>.
 147         #      DON'T USE!  Only __chain_b uses this.  Use isbjunk.
 148         # isbjunk
 149         #      for x in b, isbjunk(x) == isjunk(x) but much faster;
 150         #      it's really the has_key method of a hidden dict.
 151         #      DOES NOT WORK for x in a!
 152
 153         self.isjunk = isjunk
 154         self.a = self.b = None
 155         self.set_seqs(a, b)
 156
 157     def set_seqs(self, a, b):
 158         self.set_seq1(a)
 159         self.set_seq2(b)
 160
 161     def set_seq1(self, a):
 162         if a is self.a:
 163             return
 164         self.a = a
 165         self.matching_blocks = self.opcodes = None
 166
 167     def set_seq2(self, b):
 168         if b is self.b:
 169             return
 170         self.b = b
 171         self.matching_blocks = self.opcodes = None
 172         self.fullbcount = None
 173         self.__chain_b()
 174
 175     # For each element x in b, set b2j[x] to a list of the indices in
 176     # b where x appears; the indices are in increasing order; note that
 177     # the number of times x appears in b is len(b2j[x]) ...
 178     # when self.isjunk is defined, junk elements don't show up in this
 179     # map at all, which stops the central find_longest_match method
 180     # from starting any matching block at a junk element ...
 181     # also creates the fast isbjunk function ...
 182     # note that this is only called when b changes; so for cross-product
 183     # kinds of matches, it's best to call set_seq2 once, then set_seq1
 184     # repeatedly
 185
 186     def __chain_b(self):
 187         # Because isjunk is a user-defined (not C) function, and we test
 188         # for junk a LOT, it's important to minimize the number of calls.
 189         # Before the tricks described here, __chain_b was by far the most
 190         # time-consuming routine in the whole module!  If anyone sees
 191         # Jim Roskind, thank him again for profile.py -- I never would
 192         # have guessed that.
 193         # The first trick is to build b2j ignoring the possibility
 194         # of junk.  I.e., we don't call isjunk at all yet.  Throwing
 195         # out the junk later is much cheaper than building b2j "right"
 196         # from the start.
 197         b = self.b
 198         self.b2j = b2j = {}
 199         self.b2jhas = b2jhas = b2j.has_key
 200         for i in xrange(len(b)):
 201             elt = b[i]
 202             if b2jhas(elt):
 203                 b2j[elt].append(i)
 204             else:
 205                 b2j[elt] = [i]
 206
 207         # Now b2j.keys() contains elements uniquely, and especially when
 208         # the sequence is a string, that's usually a good deal smaller
 209         # than len(string).  The difference is the number of isjunk calls
 210         # saved.
 211         isjunk, junkdict = self.isjunk, {}
 212         if isjunk:
 213             for elt in b2j.keys():
 214                 if isjunk(elt):
 215                     junkdict[elt] = 1   # value irrelevant; it's a set
 216                     del b2j[elt]
 217
 218         # Now for x in b, isjunk(x) == junkdict.has_key(x), but the
 219         # latter is much faster.  Note too that while there may be a
 220         # lot of junk in the sequence, the number of *unique* junk
 221         # elements is probably small.  So the memory burden of keeping
 222         # this dict alive is likely trivial compared to the size of b2j.
 223         self.isbjunk = junkdict.has_key
 224
 225     def find_longest_match(self, alo, ahi, blo, bhi):
 226         """Find longest matching block in a[alo:ahi] and b[blo:bhi].
 227
 228         If isjunk is not defined:
 229
 230         Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where
 231             alo <= i <= i+k <= ahi
 232             blo <= j <= j+k <= bhi
 233         and for all (i',j',k') meeting those conditions,
 234             k >= k'
 235             i <= i'
 236             and if i == i', j <= j'
 237         In other words, of all maximal matching blocks, return one
 238         that starts earliest in a, and of all those maximal matching
 239         blocks that start earliest in a, return the one that starts
 240         earliest in b.
 241
 242         If isjunk is defined, first the longest matching block is
 243         determined as above, but with the additional restriction that
 244         no junk element appears in the block.  Then that block is
 245         extended as far as possible by matching (only) junk elements on
 246         both sides.  So the resulting block never matches on junk except
 247         as identical junk happens to be adjacent to an "interesting"
 248         match.
 249
 250         If no blocks match, return (alo, blo, 0).
 251         """
 252
 253         # CAUTION:  stripping common prefix or suffix would be incorrect.
 254         # E.g.,
 255         #    ab
 256         #    acab
 257         # Longest matching block is "ab", but if common prefix is
 258         # stripped, it's "a" (tied with "b").  UNIX(tm) diff does so
 259         # strip, so ends up claiming that ab is changed to acab by
 260         # inserting "ca" in the middle.  That's minimal but unintuitive:
 261         # "it's obvious" that someone inserted "ac" at the front.
 262         # Windiff ends up at the same place as diff, but by pairing up
 263         # the unique 'b's and then matching the first two 'a's.
 264
 265         a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk
 266         besti, bestj, bestsize = alo, blo, 0
 267         # find longest junk-free match
 268         # during an iteration of the loop, j2len[j] = length of longest
 269         # junk-free match ending with a[i-1] and b[j]
 270         j2len = {}
 271         nothing = []
 272         for i in xrange(alo, ahi):
 273             # look at all instances of a[i] in b; note that because
 274             # b2j has no junk keys, the loop is skipped if a[i] is junk
 275             j2lenget = j2len.get
 276             newj2len = {}
 277             for j in b2j.get(a[i], nothing):
 278                 # a[i] matches b[j]
 279                 if j < blo:
 280                     continue
 281                 if j >= bhi:
 282                     break
 283                 k = newj2len[j] = j2lenget(j-1, 0) + 1
 284                 if k > bestsize:
 285                     besti, bestj, bestsize = i-k+1, j-k+1, k
 286             j2len = newj2len
 287
 288         # Now that we have a wholly interesting match (albeit possibly
 289         # empty!), we may as well suck up the matching junk on each
 290         # side of it too.  Can't think of a good reason not to, and it
 291         # saves post-processing the (possibly considerable) expense of
 292         # figuring out what to do with it.  In the case of an empty
 293         # interesting match, this is clearly the right thing to do,
 294         # because no other kind of match is possible in the regions.
 295         while besti > alo and bestj > blo and \
 296               isbjunk(b[bestj-1]) and \
 297               a[besti-1] == b[bestj-1]:
 298             besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
 299         while besti+bestsize < ahi and bestj+bestsize < bhi and \
 300               isbjunk(b[bestj+bestsize]) and \
 301               a[besti+bestsize] == b[bestj+bestsize]:
 302             bestsize = bestsize + 1
 303
 304         if TRACE:
 305             print "get_matching_blocks", alo, ahi, blo, bhi
 306             print "    returns", besti, bestj, bestsize
 307         return besti, bestj, bestsize
 308
 309     def get_matching_blocks(self):
 310         if self.matching_blocks is not None:
 311             return self.matching_blocks
 312         self.matching_blocks = []
 313         la, lb = len(self.a), len(self.b)
 314         self.__helper(0, la, 0, lb, self.matching_blocks)
 315         self.matching_blocks.append( (la, lb, 0) )
 316         if TRACE:
 317             print '*** matching blocks', self.matching_blocks
 318         return self.matching_blocks
 319
 320     # builds list of matching blocks covering a[alo:ahi] and
 321     # b[blo:bhi], appending them in increasing order to answer
 322
 323     def __helper(self, alo, ahi, blo, bhi, answer):
 324         i, j, k = x = self.find_longest_match(alo, ahi, blo, bhi)
 325         # a[alo:i] vs b[blo:j] unknown
 326         # a[i:i+k] same as b[j:j+k]
 327         # a[i+k:ahi] vs b[j+k:bhi] unknown
 328         if k:
 329             if alo < i and blo < j:
 330                 self.__helper(alo, i, blo, j, answer)
 331             answer.append(x)
 332             if i+k < ahi and j+k < bhi:
 333                 self.__helper(i+k, ahi, j+k, bhi, answer)
 334
 335     def ratio(self):
 336         """Return a measure of the sequences' similarity (float in [0,1]).
 337
 338         Where T is the total number of elements in both sequences, and
 339         M is the number of matches, this is 2*M / T.
 340         Note that this is 1 if the sequences are identical, and 0 if
 341         they have nothing in common.
 342         """
 343
 344         matches = reduce(lambda sum, triple: sum + triple[-1],
 345                          self.get_matching_blocks(), 0)
 346         return 2.0 * matches / (len(self.a) + len(self.b))
 347
 348     def quick_ratio(self):
 349         """Return an upper bound on ratio() relatively quickly."""
 350         # viewing a and b as multisets, set matches to the cardinality
 351         # of their intersection; this counts the number of matches
 352         # without regard to order, so is clearly an upper bound
 353         if self.fullbcount is None:
 354             self.fullbcount = fullbcount = {}
 355             for elt in self.b:
 356                 fullbcount[elt] = fullbcount.get(elt, 0) + 1
 357         fullbcount = self.fullbcount
 358         # avail[x] is the number of times x appears in 'b' less the
 359         # number of times we've seen it in 'a' so far ... kinda
 360         avail = {}
 361         availhas, matches = avail.has_key, 0
 362         for elt in self.a:
 363             if availhas(elt):
 364                 numb = avail[elt]
 365             else:
 366                 numb = fullbcount.get(elt, 0)
 367             avail[elt] = numb - 1
 368             if numb > 0:
 369                 matches = matches + 1
 370         return 2.0 * matches / (len(self.a) + len(self.b))
 371
 372     def real_quick_ratio(self):
 373         """Return an upper bound on ratio() very quickly"""
 374         la, lb = len(self.a), len(self.b)
 375         # can't have more matches than the number of elements in the
 376         # shorter sequence
 377         return 2.0 * min(la, lb) / (la + lb)
 378
 379     def get_opcodes(self):
 380         if self.opcodes is not None:
 381             return self.opcodes
 382         i = j = 0
 383         self.opcodes = answer = []
 384         for ai, bj, size in self.get_matching_blocks():
 385             # invariant:  we've pumped out correct diffs to change
 386             # a[:i] into b[:j], and the next matching block is
 387             # a[ai:ai+size] == b[bj:bj+size].  So we need to pump
 388             # out a diff to change a[i:ai] into b[j:bj], pump out
 389             # the matching block, and move (i,j) beyond the match
 390             tag = ''
 391             if i < ai and j < bj:
 392                 tag = 'replace'
 393             elif i < ai:
 394                 tag = 'delete'
 395             elif j < bj:
 396                 tag = 'insert'
 397             if tag:
 398                 answer.append( (tag, i, ai, j, bj) )
 399             i, j = ai+size, bj+size
 400             # the list of matching blocks is terminated by a
 401             # sentinel with size 0
 402             if size:
 403                 answer.append( ('equal', ai, i, bj, j) )
 404         return answer
 405
 406 # meant for dumping lines
 407 def dump(tag, x, lo, hi):
 408     for i in xrange(lo, hi):
 409         print tag, x[i],
 410
 411 # figure out which mark to stick under characters in lines that
 412 # have changed (blank = same, - = deleted, + = inserted, ^ = replaced)
 413 _combine = { '  ': ' ',
 414              '. ': '-',
 415              ' .': '+',
 416              '..': '^' }
 417
 418 def plain_replace(a, alo, ahi, b, blo, bhi):
 419     assert alo < ahi and blo < bhi
 420     # dump the shorter block first -- reduces the burden on short-term
 421     # memory if the blocks are of very different sizes
 422     if bhi - blo < ahi - alo:
 423         dump('+', b, blo, bhi)
 424         dump('-', a, alo, ahi)
 425     else:
 426         dump('-', a, alo, ahi)
 427         dump('+', b, blo, bhi)
 428
 429 # When replacing one block of lines with another, this guy searches
 430 # the blocks for *similar* lines; the best-matching pair (if any) is
 431 # used as a synch point, and intraline difference marking is done on
 432 # the similar pair.  Lots of work, but often worth it.
 433
 434 def fancy_replace(a, alo, ahi, b, blo, bhi):
 435     if TRACE:
 436         print '*** fancy_replace', alo, ahi, blo, bhi
 437         dump('>', a, alo, ahi)
 438         dump('<', b, blo, bhi)
 439
 440     # don't synch up unless the lines have a similarity score of at
 441     # least cutoff; best_ratio tracks the best score seen so far
 442     best_ratio, cutoff = 0.74, 0.75
 443     cruncher = SequenceMatcher(IS_CHARACTER_JUNK)
 444     eqi, eqj = None, None   # 1st indices of equal lines (if any)
 445
 446     # search for the pair that matches best without being identical
 447     # (identical lines must be junk lines, & we don't want to synch up
 448     # on junk -- unless we have to)
 449     for j in xrange(blo, bhi):
 450         bj = b[j]
 451         cruncher.set_seq2(bj)
 452         for i in xrange(alo, ahi):
 453             ai = a[i]
 454             if ai == bj:
 455                 if eqi is None:
 456                     eqi, eqj = i, j
 457                 continue
 458             cruncher.set_seq1(ai)
 459             # computing similarity is expensive, so use the quick
 460             # upper bounds first -- have seen this speed up messy
 461             # compares by a factor of 3.
 462             # note that ratio() is only expensive to compute the first
 463             # time it's called on a sequence pair; the expensive part
 464             # of the computation is cached by cruncher
 465             if cruncher.real_quick_ratio() > best_ratio and \
 466                   cruncher.quick_ratio() > best_ratio and \
 467                   cruncher.ratio() > best_ratio:
 468                 best_ratio, best_i, best_j = cruncher.ratio(), i, j
 469     if best_ratio < cutoff:
 470         # no non-identical "pretty close" pair
 471         if eqi is None:
 472             # no identical pair either -- treat it as a straight replace
 473             plain_replace(a, alo, ahi, b, blo, bhi)
 474             return
 475         # no close pair, but an identical pair -- synch up on that
 476         best_i, best_j, best_ratio = eqi, eqj, 1.0
 477     else:
 478         # there's a close pair, so forget the identical pair (if any)
 479         eqi = None
 480
 481     # a[best_i] very similar to b[best_j]; eqi is None iff they're not
 482     # identical
 483     if TRACE:
 484         print '*** best_ratio', best_ratio, best_i, best_j
 485         dump('>', a, best_i, best_i+1)
 486         dump('<', b, best_j, best_j+1)
 487
 488     # pump out diffs from before the synch point
 489     fancy_helper(a, alo, best_i, b, blo, best_j)
 490
 491     # do intraline marking on the synch pair
 492     aelt, belt = a[best_i], b[best_j]
 493     if eqi is None:
 494         # pump out a '-', '+', '?' triple for the synched lines;
 495         atags = btags = ""
 496         cruncher.set_seqs(aelt, belt)
 497         for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
 498             la, lb = ai2 - ai1, bj2 - bj1
 499             if tag == 'replace':
 500                 atags = atags + '.' * la
 501                 btags = btags + '.' * lb
 502             elif tag == 'delete':
 503                 atags = atags + '.' * la
 504             elif tag == 'insert':
 505                 btags = btags + '.' * lb
 506             elif tag == 'equal':
 507                 atags = atags + ' ' * la
 508                 btags = btags + ' ' * lb
 509             else:
 510                 raise ValueError, 'unknown tag ' + `tag`
 511         la, lb = len(atags), len(btags)
 512         if la < lb:
 513             atags = atags + ' ' * (lb - la)
 514         elif lb < la:
 515             btags = btags + ' ' * (la - lb)
 516         combined = map(lambda x,y: _combine[x+y], atags, btags)
 517         print '-', aelt, '+', belt, '?', \
 518               string.rstrip(string.join(combined, ''))
 519     else:
 520         # the synch pair is identical
 521         print ' ', aelt,
 522
 523     # pump out diffs from after the synch point
 524     fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
 525
 526 def fancy_helper(a, alo, ahi, b, blo, bhi):
 527     if alo < ahi:
 528         if blo < bhi:
 529             fancy_replace(a, alo, ahi, b, blo, bhi)
 530         else:
 531             dump('-', a, alo, ahi)
 532     elif blo < bhi:
 533         dump('+', b, blo, bhi)
 534
 535 def fail(msg):
 536     import sys
 537     out = sys.stderr.write
 538     out(msg + "\n\n")
 539     out(__doc__)
 540     return 0
 541
 542 # open a file & return the file object; gripe and return 0 if it
 543 # couldn't be opened
 544 def fopen(fname):
 545     try:
 546         return open(fname, 'r')
 547     except IOError, detail:
 548         return fail("couldn't open " + fname + ": " + str(detail))
 549
 550 # open two files & spray the diff to stdout; return false iff a problem
 551 def fcompare(f1name, f2name):
 552     f1 = fopen(f1name)
 553     f2 = fopen(f2name)
 554     if not f1 or not f2:
 555         return 0
 556
 557     a = f1.readlines(); f1.close()
 558     b = f2.readlines(); f2.close()
 559
 560     cruncher = SequenceMatcher(IS_LINE_JUNK, a, b)
 561     for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
 562         if tag == 'replace':
 563             fancy_replace(a, alo, ahi, b, blo, bhi)
 564         elif tag == 'delete':
 565             dump('-', a, alo, ahi)
 566         elif tag == 'insert':
 567             dump('+', b, blo, bhi)
 568         elif tag == 'equal':
 569             dump(' ', a, alo, ahi)
 570         else:
 571             raise ValueError, 'unknown tag ' + `tag`
 572
 573     return 1
 574
 575 # crack args (sys.argv[1:] is normal) & compare;
 576 # return false iff a problem
 577
 578 def main(args):
 579     import getopt
 580     try:
 581         opts, args = getopt.getopt(args, "qr:")
 582     except getopt.error, detail:
 583         return fail(str(detail))
 584     noisy = 1
 585     qseen = rseen = 0
 586     for opt, val in opts:
 587         if opt == "-q":
 588             qseen = 1
 589             noisy = 0
 590         elif opt == "-r":
 591             rseen = 1
 592             whichfile = val
 593     if qseen and rseen:
 594         return fail("can't specify both -q and -r")
 595     if rseen:
 596         if args:
 597             return fail("no args allowed with -r option")
 598         if whichfile in "12":
 599             restore(whichfile)
 600             return 1
 601         return fail("-r value must be 1 or 2")
 602     if len(args) != 2:
 603         return fail("need 2 filename args")
 604     f1name, f2name = args
 605     if noisy:
 606         print '-:', f1name
 607         print '+:', f2name
 608     return fcompare(f1name, f2name)
 609
 610 def restore(which):
 611     import sys
 612     tag = {"1": "- ", "2": "+ "}[which]
 613     prefixes = ("  ", tag)
 614     for line in sys.stdin.readlines():
 615         if line[:2] in prefixes:
 616             print line[2:],
 617
 618 if __name__ == '__main__':
 619     import sys
 620     args = sys.argv[1:]
 621     if "-profile" in args:
 622         import profile, pstats
 623         args.remove("-profile")
 624         statf = "ndiff.pro"
 625         profile.run("main(args)", statf)
 626         stats = pstats.Stats(statf)
 627         stats.strip_dirs().sort_stats('time').print_stats()
 628     else:
 629         main(args)