py-cvs-rel2_1 (Rev 1.2) merge
[python/dscho.git] / Lib / distutils / filelist.py
blob211b65f8d2f4e6cc959c8705c7bdb90e0b2c2bec
1 """distutils.filelist
3 Provides the FileList class, used for poking about the filesystem
4 and building lists of files.
5 """
7 # created 2000/07/17, Rene Liebscher (as template.py)
8 # most parts taken from commands/sdist.py
9 # renamed 2000/07/29 (to filelist.py) and officially added to
10 # the Distutils source, Greg Ward
12 __revision__ = "$Id$"
14 import sys, os, string, re
15 import fnmatch
16 from types import *
17 from glob import glob
18 from distutils.util import convert_path
19 from distutils.errors import DistutilsTemplateError, DistutilsInternalError
21 class FileList:
23 """A list of files built by on exploring the filesystem and filtered by
24 applying various patterns to what we find there.
26 Instance attributes:
27 dir
28 directory from which files will be taken -- only used if
29 'allfiles' not supplied to constructor
30 files
31 list of filenames currently being built/filtered/manipulated
32 allfiles
33 complete list of files under consideration (ie. without any
34 filtering applied)
35 """
37 def __init__(self,
38 warn=None,
39 debug_print=None):
40 # use standard warning and debug functions if no other given
41 self.warn = warn or self.__warn
42 self.debug_print = debug_print or self.__debug_print
44 self.allfiles = None
45 self.files = []
48 def set_allfiles (self, allfiles):
49 self.allfiles = allfiles
51 def findall (self, dir=os.curdir):
52 self.allfiles = findall(dir)
55 # -- Fallback warning/debug functions ------------------------------
57 def __warn (self, msg):
58 sys.stderr.write("warning: %s\n" % msg)
60 def __debug_print (self, msg):
61 """Print 'msg' to stdout if the global DEBUG (taken from the
62 DISTUTILS_DEBUG environment variable) flag is true.
63 """
64 from distutils.core import DEBUG
65 if DEBUG:
66 print msg
69 # -- List-like methods ---------------------------------------------
71 def append (self, item):
72 self.files.append(item)
74 def extend (self, items):
75 self.files.extend(items)
77 def sort (self):
78 # Not a strict lexical sort!
79 sortable_files = map(os.path.split, self.files)
80 sortable_files.sort()
81 self.files = []
82 for sort_tuple in sortable_files:
83 self.files.append(apply(os.path.join, sort_tuple))
86 # -- Other miscellaneous utility methods ---------------------------
88 def remove_duplicates (self):
89 # Assumes list has been sorted!
90 for i in range(len(self.files)-1, 0, -1):
91 if self.files[i] == self.files[i-1]:
92 del self.files[i]
95 # -- "File template" methods ---------------------------------------
97 def _parse_template_line (self, line):
98 words = string.split(line)
99 action = words[0]
101 patterns = dir = dir_pattern = None
103 if action in ('include', 'exclude',
104 'global-include', 'global-exclude'):
105 if len(words) < 2:
106 raise DistutilsTemplateError, \
107 "'%s' expects <pattern1> <pattern2> ..." % action
109 patterns = map(convert_path, words[1:])
111 elif action in ('recursive-include', 'recursive-exclude'):
112 if len(words) < 3:
113 raise DistutilsTemplateError, \
114 "'%s' expects <dir> <pattern1> <pattern2> ..." % action
116 dir = convert_path(words[1])
117 patterns = map(convert_path, words[2:])
119 elif action in ('graft', 'prune'):
120 if len(words) != 2:
121 raise DistutilsTemplateError, \
122 "'%s' expects a single <dir_pattern>" % action
124 dir_pattern = convert_path(words[1])
126 else:
127 raise DistutilsTemplateError, "unknown action '%s'" % action
129 return (action, patterns, dir, dir_pattern)
131 # _parse_template_line ()
134 def process_template_line (self, line):
136 # Parse the line: split it up, make sure the right number of words
137 # is there, and return the relevant words. 'action' is always
138 # defined: it's the first word of the line. Which of the other
139 # three are defined depends on the action; it'll be either
140 # patterns, (dir and patterns), or (dir_pattern).
141 (action, patterns, dir, dir_pattern) = self._parse_template_line(line)
143 # OK, now we know that the action is valid and we have the
144 # right number of words on the line for that action -- so we
145 # can proceed with minimal error-checking.
146 if action == 'include':
147 self.debug_print("include " + string.join(patterns))
148 for pattern in patterns:
149 if not self.include_pattern(pattern, anchor=1):
150 self.warn("no files found matching '%s'" % pattern)
152 elif action == 'exclude':
153 self.debug_print("exclude " + string.join(patterns))
154 for pattern in patterns:
155 if not self.exclude_pattern(pattern, anchor=1):
156 self.warn(
157 "no previously-included files found matching '%s'"%
158 pattern)
160 elif action == 'global-include':
161 self.debug_print("global-include " + string.join(patterns))
162 for pattern in patterns:
163 if not self.include_pattern(pattern, anchor=0):
164 self.warn(("no files found matching '%s' " +
165 "anywhere in distribution") %
166 pattern)
168 elif action == 'global-exclude':
169 self.debug_print("global-exclude " + string.join(patterns))
170 for pattern in patterns:
171 if not self.exclude_pattern(pattern, anchor=0):
172 self.warn(("no previously-included files matching '%s' " +
173 "found anywhere in distribution") %
174 pattern)
176 elif action == 'recursive-include':
177 self.debug_print("recursive-include %s %s" %
178 (dir, string.join(patterns)))
179 for pattern in patterns:
180 if not self.include_pattern(pattern, prefix=dir):
181 self.warn(("no files found matching '%s' " +
182 "under directory '%s'") %
183 (pattern, dir))
185 elif action == 'recursive-exclude':
186 self.debug_print("recursive-exclude %s %s" %
187 (dir, string.join(patterns)))
188 for pattern in patterns:
189 if not self.exclude_pattern(pattern, prefix=dir):
190 self.warn(("no previously-included files matching '%s' " +
191 "found under directory '%s'") %
192 (pattern, dir))
194 elif action == 'graft':
195 self.debug_print("graft " + dir_pattern)
196 if not self.include_pattern(None, prefix=dir_pattern):
197 self.warn("no directories found matching '%s'" % dir_pattern)
199 elif action == 'prune':
200 self.debug_print("prune " + dir_pattern)
201 if not self.exclude_pattern(None, prefix=dir_pattern):
202 self.warn(("no previously-included directories found " +
203 "matching '%s'") %
204 dir_pattern)
205 else:
206 raise DistutilsInternalError, \
207 "this cannot happen: invalid action '%s'" % action
209 # process_template_line ()
212 # -- Filtering/selection methods -----------------------------------
214 def include_pattern (self, pattern,
215 anchor=1, prefix=None, is_regex=0):
216 """Select strings (presumably filenames) from 'self.files' that
217 match 'pattern', a Unix-style wildcard (glob) pattern. Patterns
218 are not quite the same as implemented by the 'fnmatch' module: '*'
219 and '?' match non-special characters, where "special" is platform-
220 dependent: slash on Unix; colon, slash, and backslash on
221 DOS/Windows; and colon on Mac OS.
223 If 'anchor' is true (the default), then the pattern match is more
224 stringent: "*.py" will match "foo.py" but not "foo/bar.py". If
225 'anchor' is false, both of these will match.
227 If 'prefix' is supplied, then only filenames starting with 'prefix'
228 (itself a pattern) and ending with 'pattern', with anything in between
229 them, will match. 'anchor' is ignored in this case.
231 If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
232 'pattern' is assumed to be either a string containing a regex or a
233 regex object -- no translation is done, the regex is just compiled
234 and used as-is.
236 Selected strings will be added to self.files.
238 Return 1 if files are found.
240 files_found = 0
241 pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
242 self.debug_print("include_pattern: applying regex r'%s'" %
243 pattern_re.pattern)
245 # delayed loading of allfiles list
246 if self.allfiles is None:
247 self.findall()
249 for name in self.allfiles:
250 if pattern_re.search(name):
251 self.debug_print(" adding " + name)
252 self.files.append(name)
253 files_found = 1
255 return files_found
257 # include_pattern ()
260 def exclude_pattern (self, pattern,
261 anchor=1, prefix=None, is_regex=0):
262 """Remove strings (presumably filenames) from 'files' that match
263 'pattern'. Other parameters are the same as for
264 'include_pattern()', above.
265 The list 'self.files' is modified in place.
266 Return 1 if files are found.
268 files_found = 0
269 pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
270 self.debug_print("exclude_pattern: applying regex r'%s'" %
271 pattern_re.pattern)
272 for i in range(len(self.files)-1, -1, -1):
273 if pattern_re.search(self.files[i]):
274 self.debug_print(" removing " + self.files[i])
275 del self.files[i]
276 files_found = 1
278 return files_found
280 # exclude_pattern ()
282 # class FileList
285 # ----------------------------------------------------------------------
286 # Utility functions
288 def findall (dir = os.curdir):
289 """Find all files under 'dir' and return the list of full filenames
290 (relative to 'dir').
292 from stat import ST_MODE, S_ISREG, S_ISDIR, S_ISLNK
294 list = []
295 stack = [dir]
296 pop = stack.pop
297 push = stack.append
299 while stack:
300 dir = pop()
301 names = os.listdir(dir)
303 for name in names:
304 if dir != os.curdir: # avoid the dreaded "./" syndrome
305 fullname = os.path.join(dir, name)
306 else:
307 fullname = name
309 # Avoid excess stat calls -- just one will do, thank you!
310 stat = os.stat(fullname)
311 mode = stat[ST_MODE]
312 if S_ISREG(mode):
313 list.append(fullname)
314 elif S_ISDIR(mode) and not S_ISLNK(mode):
315 push(fullname)
317 return list
320 def glob_to_re (pattern):
321 """Translate a shell-like glob pattern to a regular expression; return
322 a string containing the regex. Differs from 'fnmatch.translate()' in
323 that '*' does not match "special characters" (which are
324 platform-specific).
326 pattern_re = fnmatch.translate(pattern)
328 # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
329 # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
330 # and by extension they shouldn't match such "special characters" under
331 # any OS. So change all non-escaped dots in the RE to match any
332 # character except the special characters.
333 # XXX currently the "special characters" are just slash -- i.e. this is
334 # Unix-only.
335 pattern_re = re.sub(r'(^|[^\\])\.', r'\1[^/]', pattern_re)
336 return pattern_re
338 # glob_to_re ()
341 def translate_pattern (pattern, anchor=1, prefix=None, is_regex=0):
342 """Translate a shell-like wildcard pattern to a compiled regular
343 expression. Return the compiled regex. If 'is_regex' true,
344 then 'pattern' is directly compiled to a regex (if it's a string)
345 or just returned as-is (assumes it's a regex object).
347 if is_regex:
348 if type(pattern) is StringType:
349 return re.compile(pattern)
350 else:
351 return pattern
353 if pattern:
354 pattern_re = glob_to_re(pattern)
355 else:
356 pattern_re = ''
358 if prefix is not None:
359 prefix_re = (glob_to_re(prefix))[0:-1] # ditch trailing $
360 pattern_re = "^" + os.path.join(prefix_re, ".*" + pattern_re)
361 else: # no prefix -- respect anchor flag
362 if anchor:
363 pattern_re = "^" + pattern_re
365 return re.compile(pattern_re)
367 # translate_pattern ()