This commit was manufactured by cvs2svn to create tag 'r241c1'.
[python/dscho.git] / Doc / lib / libdifflib.tex
blob765accc84bd761678200d083265b264f83ff246d
1 \section{\module{difflib} ---
2 Helpers for computing deltas}
4 \declaremodule{standard}{difflib}
5 \modulesynopsis{Helpers for computing differences between objects.}
6 \moduleauthor{Tim Peters}{tim_one@users.sourceforge.net}
7 \sectionauthor{Tim Peters}{tim_one@users.sourceforge.net}
8 % LaTeXification by Fred L. Drake, Jr. <fdrake@acm.org>.
10 \versionadded{2.1}
13 \begin{classdesc*}{SequenceMatcher}
14 This is a flexible class for comparing pairs of sequences of any
15 type, so long as the sequence elements are hashable. The basic
16 algorithm predates, and is a little fancier than, an algorithm
17 published in the late 1980's by Ratcliff and Obershelp under the
18 hyperbolic name ``gestalt pattern matching.'' The idea is to find
19 the longest contiguous matching subsequence that contains no
20 ``junk'' elements (the Ratcliff and Obershelp algorithm doesn't
21 address junk). The same idea is then applied recursively to the
22 pieces of the sequences to the left and to the right of the matching
23 subsequence. This does not yield minimal edit sequences, but does
24 tend to yield matches that ``look right'' to people.
26 \strong{Timing:} The basic Ratcliff-Obershelp algorithm is cubic
27 time in the worst case and quadratic time in the expected case.
28 \class{SequenceMatcher} is quadratic time for the worst case and has
29 expected-case behavior dependent in a complicated way on how many
30 elements the sequences have in common; best case time is linear.
31 \end{classdesc*}
33 \begin{classdesc*}{Differ}
34 This is a class for comparing sequences of lines of text, and
35 producing human-readable differences or deltas. Differ uses
36 \class{SequenceMatcher} both to compare sequences of lines, and to
37 compare sequences of characters within similar (near-matching)
38 lines.
40 Each line of a \class{Differ} delta begins with a two-letter code:
42 \begin{tableii}{l|l}{code}{Code}{Meaning}
43 \lineii{'- '}{line unique to sequence 1}
44 \lineii{'+ '}{line unique to sequence 2}
45 \lineii{' '}{line common to both sequences}
46 \lineii{'? '}{line not present in either input sequence}
47 \end{tableii}
49 Lines beginning with `\code{?~}' attempt to guide the eye to
50 intraline differences, and were not present in either input
51 sequence. These lines can be confusing if the sequences contain tab
52 characters.
53 \end{classdesc*}
55 \begin{classdesc*}{HtmlDiff}
57 This class can be used to create an HTML table (or a complete HTML file
58 containing the table) showing a side by side, line by line comparison
59 of text with inter-line and intra-line change highlights. The table can
60 be generated in either full or contextual difference mode.
62 The constructor for this class is:
64 \begin{funcdesc}{__init__}{\optional{tabsize}\optional{,
65 wrapcolumn}\optional{, linejunk}\optional{, charjunk}}
67 Initializes instance of \class{HtmlDiff}.
69 \var{tabsize} is an optional keyword argument to specify tab stop spacing
70 and defaults to \code{8}.
72 \var{wrapcolumn} is an optional keyword to specify column number where
73 lines are broken and wrapped, defaults to \code{None} where lines are not
74 wrapped.
76 \var{linejunk} and \var{charjunk} are optional keyword arguments passed
77 into \code{ndiff()} (used by \class{HtmlDiff} to generate the
78 side by side HTML differences). See \code{ndiff()} documentation for
79 argument default values and descriptions.
81 \end{funcdesc}
83 The following methods are public:
85 \begin{funcdesc}{make_file}{fromlines, tolines
86 \optional{, fromdesc}\optional{, todesc}\optional{, context}\optional{,
87 numlines}}
88 Compares \var{fromlines} and \var{tolines} (lists of strings) and returns
89 a string which is a complete HTML file containing a table showing line by
90 line differences with inter-line and intra-line changes highlighted.
92 \var{fromdesc} and \var{todesc} are optional keyword arguments to specify
93 from/to file column header strings (both default to an empty string).
95 \var{context} and \var{numlines} are both optional keyword arguments.
96 Set \var{context} to \code{True} when contextual differences are to be
97 shown, else the default is \code{False} to show the full files.
98 \var{numlines} defaults to \code{5}. When \var{context} is \code{True}
99 \var{numlines} controls the number of context lines which surround the
100 difference highlights. When \var{context} is \code{False} \var{numlines}
101 controls the number of lines which are shown before a difference
102 highlight when using the "next" hyperlinks (setting to zero would cause
103 the "next" hyperlinks to place the next difference highlight at the top of
104 the browser without any leading context).
105 \end{funcdesc}
107 \begin{funcdesc}{make_table}{fromlines, tolines
108 \optional{, fromdesc}\optional{, todesc}\optional{, context}\optional{,
109 numlines}}
110 Compares \var{fromlines} and \var{tolines} (lists of strings) and returns
111 a string which is a complete HTML table showing line by line differences
112 with inter-line and intra-line changes highlighted.
114 The arguments for this method are the same as those for the
115 \method{make_file()} method.
116 \end{funcdesc}
118 \file{Tools/scripts/diff.py} is a command-line front-end to this class
119 and contains a good example of its use.
121 \versionadded{2.4}
122 \end{classdesc*}
124 \begin{funcdesc}{context_diff}{a, b\optional{, fromfile}\optional{,
125 tofile}\optional{, fromfiledate}\optional{, tofiledate}\optional{,
126 n}\optional{, lineterm}}
127 Compare \var{a} and \var{b} (lists of strings); return a
128 delta (a generator generating the delta lines) in context diff
129 format.
131 Context diffs are a compact way of showing just the lines that have
132 changed plus a few lines of context. The changes are shown in a
133 before/after style. The number of context lines is set by \var{n}
134 which defaults to three.
136 By default, the diff control lines (those with \code{***} or \code{---})
137 are created with a trailing newline. This is helpful so that inputs created
138 from \function{file.readlines()} result in diffs that are suitable for use
139 with \function{file.writelines()} since both the inputs and outputs have
140 trailing newlines.
142 For inputs that do not have trailing newlines, set the \var{lineterm}
143 argument to \code{""} so that the output will be uniformly newline free.
145 The context diff format normally has a header for filenames and
146 modification times. Any or all of these may be specified using strings for
147 \var{fromfile}, \var{tofile}, \var{fromfiledate}, and \var{tofiledate}.
148 The modification times are normally expressed in the format returned by
149 \function{time.ctime()}. If not specified, the strings default to blanks.
151 \file{Tools/scripts/diff.py} is a command-line front-end for this
152 function.
154 \versionadded{2.3}
155 \end{funcdesc}
157 \begin{funcdesc}{get_close_matches}{word, possibilities\optional{,
158 n}\optional{, cutoff}}
159 Return a list of the best ``good enough'' matches. \var{word} is a
160 sequence for which close matches are desired (typically a string),
161 and \var{possibilities} is a list of sequences against which to
162 match \var{word} (typically a list of strings).
164 Optional argument \var{n} (default \code{3}) is the maximum number
165 of close matches to return; \var{n} must be greater than \code{0}.
167 Optional argument \var{cutoff} (default \code{0.6}) is a float in
168 the range [0, 1]. Possibilities that don't score at least that
169 similar to \var{word} are ignored.
171 The best (no more than \var{n}) matches among the possibilities are
172 returned in a list, sorted by similarity score, most similar first.
174 \begin{verbatim}
175 >>> get_close_matches('appel', ['ape', 'apple', 'peach', 'puppy'])
176 ['apple', 'ape']
177 >>> import keyword
178 >>> get_close_matches('wheel', keyword.kwlist)
179 ['while']
180 >>> get_close_matches('apple', keyword.kwlist)
182 >>> get_close_matches('accept', keyword.kwlist)
183 ['except']
184 \end{verbatim}
185 \end{funcdesc}
187 \begin{funcdesc}{ndiff}{a, b\optional{, linejunk}\optional{, charjunk}}
188 Compare \var{a} and \var{b} (lists of strings); return a
189 \class{Differ}-style delta (a generator generating the delta lines).
191 Optional keyword parameters \var{linejunk} and \var{charjunk} are
192 for filter functions (or \code{None}):
194 \var{linejunk}: A function that accepts a single string
195 argument, and returns true if the string is junk, or false if not.
196 The default is (\code{None}), starting with Python 2.3. Before then,
197 the default was the module-level function
198 \function{IS_LINE_JUNK()}, which filters out lines without visible
199 characters, except for at most one pound character (\character{\#}).
200 As of Python 2.3, the underlying \class{SequenceMatcher} class
201 does a dynamic analysis of which lines are so frequent as to
202 constitute noise, and this usually works better than the pre-2.3
203 default.
205 \var{charjunk}: A function that accepts a character (a string of
206 length 1), and returns if the character is junk, or false if not.
207 The default is module-level function \function{IS_CHARACTER_JUNK()},
208 which filters out whitespace characters (a blank or tab; note: bad
209 idea to include newline in this!).
211 \file{Tools/scripts/ndiff.py} is a command-line front-end to this
212 function.
214 \begin{verbatim}
215 >>> diff = ndiff('one\ntwo\nthree\n'.splitlines(1),
216 ... 'ore\ntree\nemu\n'.splitlines(1))
217 >>> print ''.join(diff),
218 - one
220 + ore
222 - two
223 - three
225 + tree
226 + emu
227 \end{verbatim}
228 \end{funcdesc}
230 \begin{funcdesc}{restore}{sequence, which}
231 Return one of the two sequences that generated a delta.
233 Given a \var{sequence} produced by \method{Differ.compare()} or
234 \function{ndiff()}, extract lines originating from file 1 or 2
235 (parameter \var{which}), stripping off line prefixes.
237 Example:
239 \begin{verbatim}
240 >>> diff = ndiff('one\ntwo\nthree\n'.splitlines(1),
241 ... 'ore\ntree\nemu\n'.splitlines(1))
242 >>> diff = list(diff) # materialize the generated delta into a list
243 >>> print ''.join(restore(diff, 1)),
246 three
247 >>> print ''.join(restore(diff, 2)),
249 tree
251 \end{verbatim}
253 \end{funcdesc}
255 \begin{funcdesc}{unified_diff}{a, b\optional{, fromfile}\optional{,
256 tofile}\optional{, fromfiledate}\optional{, tofiledate}\optional{,
257 n}\optional{, lineterm}}
258 Compare \var{a} and \var{b} (lists of strings); return a
259 delta (a generator generating the delta lines) in unified diff
260 format.
262 Unified diffs are a compact way of showing just the lines that have
263 changed plus a few lines of context. The changes are shown in a
264 inline style (instead of separate before/after blocks). The number
265 of context lines is set by \var{n} which defaults to three.
267 By default, the diff control lines (those with \code{---}, \code{+++},
268 or \code{@@}) are created with a trailing newline. This is helpful so
269 that inputs created from \function{file.readlines()} result in diffs
270 that are suitable for use with \function{file.writelines()} since both
271 the inputs and outputs have trailing newlines.
273 For inputs that do not have trailing newlines, set the \var{lineterm}
274 argument to \code{""} so that the output will be uniformly newline free.
276 The context diff format normally has a header for filenames and
277 modification times. Any or all of these may be specified using strings for
278 \var{fromfile}, \var{tofile}, \var{fromfiledate}, and \var{tofiledate}.
279 The modification times are normally expressed in the format returned by
280 \function{time.ctime()}. If not specified, the strings default to blanks.
282 \file{Tools/scripts/diff.py} is a command-line front-end for this
283 function.
285 \versionadded{2.3}
286 \end{funcdesc}
288 \begin{funcdesc}{IS_LINE_JUNK}{line}
289 Return true for ignorable lines. The line \var{line} is ignorable
290 if \var{line} is blank or contains a single \character{\#},
291 otherwise it is not ignorable. Used as a default for parameter
292 \var{linejunk} in \function{ndiff()} before Python 2.3.
293 \end{funcdesc}
296 \begin{funcdesc}{IS_CHARACTER_JUNK}{ch}
297 Return true for ignorable characters. The character \var{ch} is
298 ignorable if \var{ch} is a space or tab, otherwise it is not
299 ignorable. Used as a default for parameter \var{charjunk} in
300 \function{ndiff()}.
301 \end{funcdesc}
304 \begin{seealso}
305 \seetitle[http://www.ddj.com/documents/s=1103/ddj8807c/]
306 {Pattern Matching: The Gestalt Approach}{Discussion of a
307 similar algorithm by John W. Ratcliff and D. E. Metzener.
308 This was published in
309 \citetitle[http://www.ddj.com/]{Dr. Dobb's Journal} in
310 July, 1988.}
311 \end{seealso}
314 \subsection{SequenceMatcher Objects \label{sequence-matcher}}
316 The \class{SequenceMatcher} class has this constructor:
318 \begin{classdesc}{SequenceMatcher}{\optional{isjunk\optional{,
319 a\optional{, b}}}}
320 Optional argument \var{isjunk} must be \code{None} (the default) or
321 a one-argument function that takes a sequence element and returns
322 true if and only if the element is ``junk'' and should be ignored.
323 Passing \code{None} for \var{isjunk} is equivalent to passing
324 \code{lambda x: 0}; in other words, no elements are ignored. For
325 example, pass:
327 \begin{verbatim}
328 lambda x: x in " \t"
329 \end{verbatim}
331 if you're comparing lines as sequences of characters, and don't want
332 to synch up on blanks or hard tabs.
334 The optional arguments \var{a} and \var{b} are sequences to be
335 compared; both default to empty strings. The elements of both
336 sequences must be hashable.
337 \end{classdesc}
340 \class{SequenceMatcher} objects have the following methods:
342 \begin{methoddesc}{set_seqs}{a, b}
343 Set the two sequences to be compared.
344 \end{methoddesc}
346 \class{SequenceMatcher} computes and caches detailed information about
347 the second sequence, so if you want to compare one sequence against
348 many sequences, use \method{set_seq2()} to set the commonly used
349 sequence once and call \method{set_seq1()} repeatedly, once for each
350 of the other sequences.
352 \begin{methoddesc}{set_seq1}{a}
353 Set the first sequence to be compared. The second sequence to be
354 compared is not changed.
355 \end{methoddesc}
357 \begin{methoddesc}{set_seq2}{b}
358 Set the second sequence to be compared. The first sequence to be
359 compared is not changed.
360 \end{methoddesc}
362 \begin{methoddesc}{find_longest_match}{alo, ahi, blo, bhi}
363 Find longest matching block in \code{\var{a}[\var{alo}:\var{ahi}]}
364 and \code{\var{b}[\var{blo}:\var{bhi}]}.
366 If \var{isjunk} was omitted or \code{None},
367 \method{get_longest_match()} returns \code{(\var{i}, \var{j},
368 \var{k})} such that \code{\var{a}[\var{i}:\var{i}+\var{k}]} is equal
369 to \code{\var{b}[\var{j}:\var{j}+\var{k}]}, where
370 \code{\var{alo} <= \var{i} <= \var{i}+\var{k} <= \var{ahi}} and
371 \code{\var{blo} <= \var{j} <= \var{j}+\var{k} <= \var{bhi}}.
372 For all \code{(\var{i'}, \var{j'}, \var{k'})} meeting those
373 conditions, the additional conditions
374 \code{\var{k} >= \var{k'}},
375 \code{\var{i} <= \var{i'}},
376 and if \code{\var{i} == \var{i'}}, \code{\var{j} <= \var{j'}}
377 are also met.
378 In other words, of all maximal matching blocks, return one that
379 starts earliest in \var{a}, and of all those maximal matching blocks
380 that start earliest in \var{a}, return the one that starts earliest
381 in \var{b}.
383 \begin{verbatim}
384 >>> s = SequenceMatcher(None, " abcd", "abcd abcd")
385 >>> s.find_longest_match(0, 5, 0, 9)
386 (0, 4, 5)
387 \end{verbatim}
389 If \var{isjunk} was provided, first the longest matching block is
390 determined as above, but with the additional restriction that no
391 junk element appears in the block. Then that block is extended as
392 far as possible by matching (only) junk elements on both sides.
393 So the resulting block never matches on junk except as identical
394 junk happens to be adjacent to an interesting match.
396 Here's the same example as before, but considering blanks to be junk.
397 That prevents \code{' abcd'} from matching the \code{' abcd'} at the
398 tail end of the second sequence directly. Instead only the
399 \code{'abcd'} can match, and matches the leftmost \code{'abcd'} in
400 the second sequence:
402 \begin{verbatim}
403 >>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")
404 >>> s.find_longest_match(0, 5, 0, 9)
405 (1, 0, 4)
406 \end{verbatim}
408 If no blocks match, this returns \code{(\var{alo}, \var{blo}, 0)}.
409 \end{methoddesc}
411 \begin{methoddesc}{get_matching_blocks}{}
412 Return list of triples describing matching subsequences.
413 Each triple is of the form \code{(\var{i}, \var{j}, \var{n})}, and
414 means that \code{\var{a}[\var{i}:\var{i}+\var{n}] ==
415 \var{b}[\var{j}:\var{j}+\var{n}]}. The triples are monotonically
416 increasing in \var{i} and \var{j}.
418 The last triple is a dummy, and has the value \code{(len(\var{a}),
419 len(\var{b}), 0)}. It is the only triple with \code{\var{n} == 0}.
420 % Explain why a dummy is used!
422 \begin{verbatim}
423 >>> s = SequenceMatcher(None, "abxcd", "abcd")
424 >>> s.get_matching_blocks()
425 [(0, 0, 2), (3, 2, 2), (5, 4, 0)]
426 \end{verbatim}
427 \end{methoddesc}
429 \begin{methoddesc}{get_opcodes}{}
430 Return list of 5-tuples describing how to turn \var{a} into \var{b}.
431 Each tuple is of the form \code{(\var{tag}, \var{i1}, \var{i2},
432 \var{j1}, \var{j2})}. The first tuple has \code{\var{i1} ==
433 \var{j1} == 0}, and remaining tuples have \var{i1} equal to the
434 \var{i2} from the preceding tuple, and, likewise, \var{j1} equal to
435 the previous \var{j2}.
437 The \var{tag} values are strings, with these meanings:
439 \begin{tableii}{l|l}{code}{Value}{Meaning}
440 \lineii{'replace'}{\code{\var{a}[\var{i1}:\var{i2}]} should be
441 replaced by \code{\var{b}[\var{j1}:\var{j2}]}.}
442 \lineii{'delete'}{\code{\var{a}[\var{i1}:\var{i2}]} should be
443 deleted. Note that \code{\var{j1} == \var{j2}} in
444 this case.}
445 \lineii{'insert'}{\code{\var{b}[\var{j1}:\var{j2}]} should be
446 inserted at \code{\var{a}[\var{i1}:\var{i1}]}.
447 Note that \code{\var{i1} == \var{i2}} in this
448 case.}
449 \lineii{'equal'}{\code{\var{a}[\var{i1}:\var{i2}] ==
450 \var{b}[\var{j1}:\var{j2}]} (the sub-sequences are
451 equal).}
452 \end{tableii}
454 For example:
456 \begin{verbatim}
457 >>> a = "qabxcd"
458 >>> b = "abycdf"
459 >>> s = SequenceMatcher(None, a, b)
460 >>> for tag, i1, i2, j1, j2 in s.get_opcodes():
461 ... print ("%7s a[%d:%d] (%s) b[%d:%d] (%s)" %
462 ... (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2]))
463 delete a[0:1] (q) b[0:0] ()
464 equal a[1:3] (ab) b[0:2] (ab)
465 replace a[3:4] (x) b[2:3] (y)
466 equal a[4:6] (cd) b[3:5] (cd)
467 insert a[6:6] () b[5:6] (f)
468 \end{verbatim}
469 \end{methoddesc}
471 \begin{methoddesc}{get_grouped_opcodes}{\optional{n}}
472 Return a generator of groups with up to \var{n} lines of context.
474 Starting with the groups returned by \method{get_opcodes()},
475 this method splits out smaller change clusters and eliminates
476 intervening ranges which have no changes.
478 The groups are returned in the same format as \method{get_opcodes()}.
479 \versionadded{2.3}
480 \end{methoddesc}
482 \begin{methoddesc}{ratio}{}
483 Return a measure of the sequences' similarity as a float in the
484 range [0, 1].
486 Where T is the total number of elements in both sequences, and M is
487 the number of matches, this is 2.0*M / T. Note that this is
488 \code{1.0} if the sequences are identical, and \code{0.0} if they
489 have nothing in common.
491 This is expensive to compute if \method{get_matching_blocks()} or
492 \method{get_opcodes()} hasn't already been called, in which case you
493 may want to try \method{quick_ratio()} or
494 \method{real_quick_ratio()} first to get an upper bound.
495 \end{methoddesc}
497 \begin{methoddesc}{quick_ratio}{}
498 Return an upper bound on \method{ratio()} relatively quickly.
500 This isn't defined beyond that it is an upper bound on
501 \method{ratio()}, and is faster to compute.
502 \end{methoddesc}
504 \begin{methoddesc}{real_quick_ratio}{}
505 Return an upper bound on \method{ratio()} very quickly.
507 This isn't defined beyond that it is an upper bound on
508 \method{ratio()}, and is faster to compute than either
509 \method{ratio()} or \method{quick_ratio()}.
510 \end{methoddesc}
512 The three methods that return the ratio of matching to total characters
513 can give different results due to differing levels of approximation,
514 although \method{quick_ratio()} and \method{real_quick_ratio()} are always
515 at least as large as \method{ratio()}:
517 \begin{verbatim}
518 >>> s = SequenceMatcher(None, "abcd", "bcde")
519 >>> s.ratio()
520 0.75
521 >>> s.quick_ratio()
522 0.75
523 >>> s.real_quick_ratio()
525 \end{verbatim}
528 \subsection{SequenceMatcher Examples \label{sequencematcher-examples}}
531 This example compares two strings, considering blanks to be ``junk:''
533 \begin{verbatim}
534 >>> s = SequenceMatcher(lambda x: x == " ",
535 ... "private Thread currentThread;",
536 ... "private volatile Thread currentThread;")
537 \end{verbatim}
539 \method{ratio()} returns a float in [0, 1], measuring the similarity
540 of the sequences. As a rule of thumb, a \method{ratio()} value over
541 0.6 means the sequences are close matches:
543 \begin{verbatim}
544 >>> print round(s.ratio(), 3)
545 0.866
546 \end{verbatim}
548 If you're only interested in where the sequences match,
549 \method{get_matching_blocks()} is handy:
551 \begin{verbatim}
552 >>> for block in s.get_matching_blocks():
553 ... print "a[%d] and b[%d] match for %d elements" % block
554 a[0] and b[0] match for 8 elements
555 a[8] and b[17] match for 6 elements
556 a[14] and b[23] match for 15 elements
557 a[29] and b[38] match for 0 elements
558 \end{verbatim}
560 Note that the last tuple returned by \method{get_matching_blocks()} is
561 always a dummy, \code{(len(\var{a}), len(\var{b}), 0)}, and this is
562 the only case in which the last tuple element (number of elements
563 matched) is \code{0}.
565 If you want to know how to change the first sequence into the second,
566 use \method{get_opcodes()}:
568 \begin{verbatim}
569 >>> for opcode in s.get_opcodes():
570 ... print "%6s a[%d:%d] b[%d:%d]" % opcode
571 equal a[0:8] b[0:8]
572 insert a[8:8] b[8:17]
573 equal a[8:14] b[17:23]
574 equal a[14:29] b[23:38]
575 \end{verbatim}
577 See also the function \function{get_close_matches()} in this module,
578 which shows how simple code building on \class{SequenceMatcher} can be
579 used to do useful work.
582 \subsection{Differ Objects \label{differ-objects}}
584 Note that \class{Differ}-generated deltas make no claim to be
585 \strong{minimal} diffs. To the contrary, minimal diffs are often
586 counter-intuitive, because they synch up anywhere possible, sometimes
587 accidental matches 100 pages apart. Restricting synch points to
588 contiguous matches preserves some notion of locality, at the
589 occasional cost of producing a longer diff.
591 The \class{Differ} class has this constructor:
593 \begin{classdesc}{Differ}{\optional{linejunk\optional{, charjunk}}}
594 Optional keyword parameters \var{linejunk} and \var{charjunk} are
595 for filter functions (or \code{None}):
597 \var{linejunk}: A function that accepts a single string
598 argument, and returns true if the string is junk. The default is
599 \code{None}, meaning that no line is considered junk.
601 \var{charjunk}: A function that accepts a single character argument
602 (a string of length 1), and returns true if the character is junk.
603 The default is \code{None}, meaning that no character is
604 considered junk.
605 \end{classdesc}
607 \class{Differ} objects are used (deltas generated) via a single
608 method:
610 \begin{methoddesc}{compare}{a, b}
611 Compare two sequences of lines, and generate the delta (a sequence
612 of lines).
614 Each sequence must contain individual single-line strings ending
615 with newlines. Such sequences can be obtained from the
616 \method{readlines()} method of file-like objects. The delta generated
617 also consists of newline-terminated strings, ready to be printed as-is
618 via the \method{writelines()} method of a file-like object.
619 \end{methoddesc}
622 \subsection{Differ Example \label{differ-examples}}
624 This example compares two texts. First we set up the texts, sequences
625 of individual single-line strings ending with newlines (such sequences
626 can also be obtained from the \method{readlines()} method of file-like
627 objects):
629 \begin{verbatim}
630 >>> text1 = ''' 1. Beautiful is better than ugly.
631 ... 2. Explicit is better than implicit.
632 ... 3. Simple is better than complex.
633 ... 4. Complex is better than complicated.
634 ... '''.splitlines(1)
635 >>> len(text1)
637 >>> text1[0][-1]
638 '\n'
639 >>> text2 = ''' 1. Beautiful is better than ugly.
640 ... 3. Simple is better than complex.
641 ... 4. Complicated is better than complex.
642 ... 5. Flat is better than nested.
643 ... '''.splitlines(1)
644 \end{verbatim}
646 Next we instantiate a Differ object:
648 \begin{verbatim}
649 >>> d = Differ()
650 \end{verbatim}
652 Note that when instantiating a \class{Differ} object we may pass
653 functions to filter out line and character ``junk.'' See the
654 \method{Differ()} constructor for details.
656 Finally, we compare the two:
658 \begin{verbatim}
659 >>> result = list(d.compare(text1, text2))
660 \end{verbatim}
662 \code{result} is a list of strings, so let's pretty-print it:
664 \begin{verbatim}
665 >>> from pprint import pprint
666 >>> pprint(result)
667 [' 1. Beautiful is better than ugly.\n',
668 '- 2. Explicit is better than implicit.\n',
669 '- 3. Simple is better than complex.\n',
670 '+ 3. Simple is better than complex.\n',
671 '? ++ \n',
672 '- 4. Complex is better than complicated.\n',
673 '? ^ ---- ^ \n',
674 '+ 4. Complicated is better than complex.\n',
675 '? ++++ ^ ^ \n',
676 '+ 5. Flat is better than nested.\n']
677 \end{verbatim}
679 As a single multi-line string it looks like this:
681 \begin{verbatim}
682 >>> import sys
683 >>> sys.stdout.writelines(result)
684 1. Beautiful is better than ugly.
685 - 2. Explicit is better than implicit.
686 - 3. Simple is better than complex.
687 + 3. Simple is better than complex.
688 ? ++
689 - 4. Complex is better than complicated.
690 ? ^ ---- ^
691 + 4. Complicated is better than complex.
692 ? ++++ ^ ^
693 + 5. Flat is better than nested.
694 \end{verbatim}