1 """Utilities for comparing files and directories.
7 cmp(f1, f2, shallow=1) -> int
8 cmpfiles(a, b, common) -> ([], [], [])
15 from itertools
import ifilter
, ifilterfalse
17 __all__
= ["cmp","dircmp","cmpfiles"]
22 def cmp(f1
, f2
, shallow
=1, use_statcache
=None):
29 f2 -- Second file name
31 shallow -- Just check stat signature (do not read the files).
34 use_statcache -- obsolete argument.
38 True if the files are the same, False otherwise.
40 This function uses a cache for past comparisons and the results,
41 with a cache invalidation mechanism relying on stale signatures.
44 if use_statcache
is not None:
45 warnings
.warn("use_statcache argument is deprecated",
48 s1
= _sig(os
.stat(f1
))
49 s2
= _sig(os
.stat(f2
))
50 if s1
[0] != stat
.S_IFREG
or s2
[0] != stat
.S_IFREG
:
52 if shallow
and s1
== s2
:
57 result
= _cache
.get((f1
, f2
))
58 if result
and (s1
, s2
) == result
[:2]:
60 outcome
= _do_cmp(f1
, f2
)
61 _cache
[f1
, f2
] = s1
, s2
, outcome
65 return (stat
.S_IFMT(st
.st_mode
),
74 b1
= fp1
.read(bufsize
)
75 b2
= fp2
.read(bufsize
)
81 # Directory comparison class.
84 """A class that manages the comparison of 2 directories.
86 dircmp(a,b,ignore=None,hide=None)
87 A and B are directories.
88 IGNORE is a list of names to ignore,
89 defaults to ['RCS', 'CVS', 'tags'].
90 HIDE is a list of names to hide,
91 defaults to [os.curdir, os.pardir].
94 x = dircmp(dir1, dir2)
95 x.report() -> prints a report on the differences between dir1 and dir2
97 x.report_partial_closure() -> prints report on differences between dir1
98 and dir2, and reports on common immediate subdirectories.
99 x.report_full_closure() -> like report_partial_closure,
103 left_list, right_list: The files in dir1 and dir2,
104 filtered by hide and ignore.
105 common: a list of names in both dir1 and dir2.
106 left_only, right_only: names only in dir1, dir2.
107 common_dirs: subdirectories in both dir1 and dir2.
108 common_files: files in both dir1 and dir2.
109 common_funny: names in both dir1 and dir2 where the type differs between
110 dir1 and dir2, or the name is not stat-able.
111 same_files: list of identical files.
112 diff_files: list of filenames which differ.
113 funny_files: list of files which could not be compared.
114 subdirs: a dictionary of dircmp objects, keyed by names in common_dirs.
117 def __init__(self
, a
, b
, ignore
=None, hide
=None): # Initialize
121 self
.hide
= [os
.curdir
, os
.pardir
] # Names never to be shown
125 self
.ignore
= ['RCS', 'CVS', 'tags'] # Names ignored in comparison
129 def phase0(self
): # Compare everything except common subdirectories
130 self
.left_list
= _filter(os
.listdir(self
.left
),
131 self
.hide
+self
.ignore
)
132 self
.right_list
= _filter(os
.listdir(self
.right
),
133 self
.hide
+self
.ignore
)
134 self
.left_list
.sort()
135 self
.right_list
.sort()
137 def phase1(self
): # Compute common names
138 b
= dict.fromkeys(self
.right_list
)
139 common
= dict.fromkeys(ifilter(b
.has_key
, self
.left_list
))
140 self
.left_only
= list(ifilterfalse(common
.has_key
, self
.left_list
))
141 self
.right_only
= list(ifilterfalse(common
.has_key
, self
.right_list
))
142 self
.common
= common
.keys()
144 def phase2(self
): # Distinguish files, directories, funnies
145 self
.common_dirs
= []
146 self
.common_files
= []
147 self
.common_funny
= []
149 for x
in self
.common
:
150 a_path
= os
.path
.join(self
.left
, x
)
151 b_path
= os
.path
.join(self
.right
, x
)
155 a_stat
= os
.stat(a_path
)
156 except os
.error
, why
:
157 # print 'Can\'t stat', a_path, ':', why[1]
160 b_stat
= os
.stat(b_path
)
161 except os
.error
, why
:
162 # print 'Can\'t stat', b_path, ':', why[1]
166 a_type
= stat
.S_IFMT(a_stat
.st_mode
)
167 b_type
= stat
.S_IFMT(b_stat
.st_mode
)
169 self
.common_funny
.append(x
)
170 elif stat
.S_ISDIR(a_type
):
171 self
.common_dirs
.append(x
)
172 elif stat
.S_ISREG(a_type
):
173 self
.common_files
.append(x
)
175 self
.common_funny
.append(x
)
177 self
.common_funny
.append(x
)
179 def phase3(self
): # Find out differences between common files
180 xx
= cmpfiles(self
.left
, self
.right
, self
.common_files
)
181 self
.same_files
, self
.diff_files
, self
.funny_files
= xx
183 def phase4(self
): # Find out differences between common subdirectories
184 # A new dircmp object is created for each common subdirectory,
185 # these are stored in a dictionary indexed by filename.
186 # The hide and ignore properties are inherited from the parent
188 for x
in self
.common_dirs
:
189 a_x
= os
.path
.join(self
.left
, x
)
190 b_x
= os
.path
.join(self
.right
, x
)
191 self
.subdirs
[x
] = dircmp(a_x
, b_x
, self
.ignore
, self
.hide
)
193 def phase4_closure(self
): # Recursively call phase4() on subdirectories
195 for sd
in self
.subdirs
.itervalues():
198 def report(self
): # Print a report on the differences between a and b
199 # Output format is purposely lousy
200 print 'diff', self
.left
, self
.right
202 self
.left_only
.sort()
203 print 'Only in', self
.left
, ':', self
.left_only
205 self
.right_only
.sort()
206 print 'Only in', self
.right
, ':', self
.right_only
208 self
.same_files
.sort()
209 print 'Identical files :', self
.same_files
211 self
.diff_files
.sort()
212 print 'Differing files :', self
.diff_files
214 self
.funny_files
.sort()
215 print 'Trouble with common files :', self
.funny_files
217 self
.common_dirs
.sort()
218 print 'Common subdirectories :', self
.common_dirs
219 if self
.common_funny
:
220 self
.common_funny
.sort()
221 print 'Common funny cases :', self
.common_funny
223 def report_partial_closure(self
): # Print reports on self and on subdirs
225 for sd
in self
.subdirs
.itervalues():
229 def report_full_closure(self
): # Report on self and subdirs recursively
231 for sd
in self
.subdirs
.itervalues():
233 sd
.report_full_closure()
235 methodmap
= dict(subdirs
=phase4
,
236 same_files
=phase3
, diff_files
=phase3
, funny_files
=phase3
,
237 common_dirs
= phase2
, common_files
=phase2
, common_funny
=phase2
,
238 common
=phase1
, left_only
=phase1
, right_only
=phase1
,
239 left_list
=phase0
, right_list
=phase0
)
241 def __getattr__(self
, attr
):
242 if attr
not in self
.methodmap
:
243 raise AttributeError, attr
244 self
.methodmap
[attr
](self
)
245 return getattr(self
, attr
)
247 def cmpfiles(a
, b
, common
, shallow
=1, use_statcache
=None):
248 """Compare common files in two directories.
250 a, b -- directory names
251 common -- list of file names found in both directories
252 shallow -- if true, do comparison based solely on stat() information
253 use_statcache -- obsolete argument
255 Returns a tuple of three lists:
256 files that compare equal
257 files that are different
258 filenames that aren't regular files.
261 if use_statcache
is not None:
262 warnings
.warn("use_statcache argument is deprecated",
266 ax
= os
.path
.join(a
, x
)
267 bx
= os
.path
.join(b
, x
)
268 res
[_cmp(ax
, bx
, shallow
)].append(x
)
276 # 2 for funny cases (can't stat, etc.)
278 def _cmp(a
, b
, sh
, abs=abs, cmp=cmp):
280 return not abs(cmp(a
, b
, sh
))
285 # Return a copy with items that occur in skip removed.
287 def _filter(flist
, skip
):
288 return list(ifilterfalse(skip
.__contains
__, flist
))
291 # Demonstration and testing.
296 options
, args
= getopt
.getopt(sys
.argv
[1:], 'r')
298 raise getopt
.GetoptError('need exactly two args', None)
299 dd
= dircmp(args
[0], args
[1])
300 if ('-r', '') in options
:
301 dd
.report_full_closure()
305 if __name__
== '__main__':