1 """Utilities for comparing files and directories.
7 cmp(f1, f2, shallow=1, use_statcache=0) -> int
8 cmpfiles(a, b, common) -> ([], [], [])
16 __all__
= ["cmp","dircmp","cmpfiles"]
21 def cmp(f1
, f2
, shallow
=1, use_statcache
=0):
28 f2 -- Second file name
30 shallow -- Just check stat signature (do not read the files).
33 use_statcache -- Do not stat() each file directly: go through
34 the statcache module for more efficiency.
38 True if the files are the same, False otherwise.
40 This function uses a cache for past comparisons and the results,
41 with a cache invalidation mechanism relying on stale signatures.
42 Of course, if 'use_statcache' is true, this mechanism is defeated,
43 and the cache will never grow stale.
47 stat_function
= statcache
.stat
49 stat_function
= os
.stat
50 s1
= _sig(stat_function(f1
))
51 s2
= _sig(stat_function(f2
))
52 if s1
[0] != stat
.S_IFREG
or s2
[0] != stat
.S_IFREG
:
54 if shallow
and s1
== s2
:
59 result
= _cache
.get((f1
, f2
))
60 if result
and (s1
, s2
) == result
[:2]:
62 outcome
= _do_cmp(f1
, f2
)
63 _cache
[f1
, f2
] = s1
, s2
, outcome
67 return (stat
.S_IFMT(st
.st_mode
),
76 b1
= fp1
.read(bufsize
)
77 b2
= fp2
.read(bufsize
)
83 # Directory comparison class.
86 """A class that manages the comparison of 2 directories.
88 dircmp(a,b,ignore=None,hide=None)
89 A and B are directories.
90 IGNORE is a list of names to ignore,
91 defaults to ['RCS', 'CVS', 'tags'].
92 HIDE is a list of names to hide,
93 defaults to [os.curdir, os.pardir].
96 x = dircmp(dir1, dir2)
97 x.report() -> prints a report on the differences between dir1 and dir2
99 x.report_partial_closure() -> prints report on differences between dir1
100 and dir2, and reports on common immediate subdirectories.
101 x.report_full_closure() -> like report_partial_closure,
105 left_list, right_list: The files in dir1 and dir2,
106 filtered by hide and ignore.
107 common: a list of names in both dir1 and dir2.
108 left_only, right_only: names only in dir1, dir2.
109 common_dirs: subdirectories in both dir1 and dir2.
110 common_files: files in both dir1 and dir2.
111 common_funny: names in both dir1 and dir2 where the type differs between
112 dir1 and dir2, or the name is not stat-able.
113 same_files: list of identical files.
114 diff_files: list of filenames which differ.
115 funny_files: list of files which could not be compared.
116 subdirs: a dictionary of dircmp objects, keyed by names in common_dirs.
119 def __init__(self
, a
, b
, ignore
=None, hide
=None): # Initialize
123 self
.hide
= [os
.curdir
, os
.pardir
] # Names never to be shown
127 self
.ignore
= ['RCS', 'CVS', 'tags'] # Names ignored in comparison
131 def phase0(self
): # Compare everything except common subdirectories
132 self
.left_list
= _filter(os
.listdir(self
.left
),
133 self
.hide
+self
.ignore
)
134 self
.right_list
= _filter(os
.listdir(self
.right
),
135 self
.hide
+self
.ignore
)
136 self
.left_list
.sort()
137 self
.right_list
.sort()
139 __p4_attrs
= ('subdirs',)
140 __p3_attrs
= ('same_files', 'diff_files', 'funny_files')
141 __p2_attrs
= ('common_dirs', 'common_files', 'common_funny')
142 __p1_attrs
= ('common', 'left_only', 'right_only')
143 __p0_attrs
= ('left_list', 'right_list')
145 def __getattr__(self
, attr
):
146 if attr
in self
.__p
4_attrs
:
148 elif attr
in self
.__p
3_attrs
:
150 elif attr
in self
.__p
2_attrs
:
152 elif attr
in self
.__p
1_attrs
:
154 elif attr
in self
.__p
0_attrs
:
157 raise AttributeError, attr
158 return getattr(self
, attr
)
160 def phase1(self
): # Compute common names
161 a_only
, b_only
= [], []
164 for fnm
in self
.right_list
:
166 for x
in self
.left_list
:
171 for x
in self
.right_list
:
176 self
.common
= common
.keys()
177 self
.left_only
= a_only
178 self
.right_only
= b_only
180 def phase2(self
): # Distinguish files, directories, funnies
181 self
.common_dirs
= []
182 self
.common_files
= []
183 self
.common_funny
= []
185 for x
in self
.common
:
186 a_path
= os
.path
.join(self
.left
, x
)
187 b_path
= os
.path
.join(self
.right
, x
)
191 a_stat
= statcache
.stat(a_path
)
192 except os
.error
, why
:
193 # print 'Can\'t stat', a_path, ':', why[1]
196 b_stat
= statcache
.stat(b_path
)
197 except os
.error
, why
:
198 # print 'Can\'t stat', b_path, ':', why[1]
202 a_type
= stat
.S_IFMT(a_stat
.st_mode
)
203 b_type
= stat
.S_IFMT(b_stat
.st_mode
)
205 self
.common_funny
.append(x
)
206 elif stat
.S_ISDIR(a_type
):
207 self
.common_dirs
.append(x
)
208 elif stat
.S_ISREG(a_type
):
209 self
.common_files
.append(x
)
211 self
.common_funny
.append(x
)
213 self
.common_funny
.append(x
)
215 def phase3(self
): # Find out differences between common files
216 xx
= cmpfiles(self
.left
, self
.right
, self
.common_files
)
217 self
.same_files
, self
.diff_files
, self
.funny_files
= xx
219 def phase4(self
): # Find out differences between common subdirectories
220 # A new dircmp object is created for each common subdirectory,
221 # these are stored in a dictionary indexed by filename.
222 # The hide and ignore properties are inherited from the parent
224 for x
in self
.common_dirs
:
225 a_x
= os
.path
.join(self
.left
, x
)
226 b_x
= os
.path
.join(self
.right
, x
)
227 self
.subdirs
[x
] = dircmp(a_x
, b_x
, self
.ignore
, self
.hide
)
229 def phase4_closure(self
): # Recursively call phase4() on subdirectories
231 for sd
in self
.subdirs
.itervalues():
234 def report(self
): # Print a report on the differences between a and b
235 # Output format is purposely lousy
236 print 'diff', self
.left
, self
.right
238 self
.left_only
.sort()
239 print 'Only in', self
.left
, ':', self
.left_only
241 self
.right_only
.sort()
242 print 'Only in', self
.right
, ':', self
.right_only
244 self
.same_files
.sort()
245 print 'Identical files :', self
.same_files
247 self
.diff_files
.sort()
248 print 'Differing files :', self
.diff_files
250 self
.funny_files
.sort()
251 print 'Trouble with common files :', self
.funny_files
253 self
.common_dirs
.sort()
254 print 'Common subdirectories :', self
.common_dirs
255 if self
.common_funny
:
256 self
.common_funny
.sort()
257 print 'Common funny cases :', self
.common_funny
259 def report_partial_closure(self
): # Print reports on self and on subdirs
261 for sd
in self
.subdirs
.itervalues():
265 def report_full_closure(self
): # Report on self and subdirs recursively
267 for sd
in self
.subdirs
.itervalues():
269 sd
.report_full_closure()
272 def cmpfiles(a
, b
, common
, shallow
=1, use_statcache
=0):
273 """Compare common files in two directories.
275 a, b -- directory names
276 common -- list of file names found in both directories
277 shallow -- if true, do comparison based solely on stat() information
278 use_statcache -- if true, use statcache.stat() instead of os.stat()
280 Returns a tuple of three lists:
281 files that compare equal
282 files that are different
283 filenames that aren't regular files.
288 ax
= os
.path
.join(a
, x
)
289 bx
= os
.path
.join(b
, x
)
290 res
[_cmp(ax
, bx
, shallow
, use_statcache
)].append(x
)
298 # 2 for funny cases (can't stat, etc.)
300 def _cmp(a
, b
, sh
, st
):
302 return not abs(cmp(a
, b
, sh
, st
))
307 # Return a copy with items that occur in skip removed.
309 def _filter(list, skip
):
312 if item
not in skip
: result
.append(item
)
316 # Demonstration and testing.
321 options
, args
= getopt
.getopt(sys
.argv
[1:], 'r')
323 raise getopt
.error
, 'need exactly two args'
324 dd
= dircmp(args
[0], args
[1])
325 if ('-r', '') in options
:
326 dd
.report_full_closure()
330 if __name__
== '__main__':