Added 'list_only' option (and modified 'run()' to respect it).
[python/dscho.git] / Lib / distutils / command / dist.py
blob3f309745ad0d533dda978a39f07479dd00567c54
1 """distutils.command.dist
3 Implements the Distutils 'dist' command (create a source distribution)."""
5 # created 1999/09/22, Greg Ward
7 __rcsid__ = "$Id$"
9 import sys, os, string, re
10 import fnmatch
11 from types import *
12 from glob import glob
13 from distutils.core import Command
14 from distutils.text_file import TextFile
17 # Possible modes of operation:
18 # - require an explicit manifest that lists every single file (presumably
19 # along with a way to auto-generate the manifest)
20 # - require an explicit manifest, but allow it to have globs or
21 # filename patterns of some kind (and also have auto-generation)
22 # - allow an explict manifest, but automatically augment it at runtime
23 # with the source files mentioned in 'packages', 'py_modules', and
24 # 'ext_modules' (and any other such things that might come along)
26 # I'm liking the third way. Possible gotchas:
27 # - redundant specification: 'packages' includes 'foo' and manifest
28 # includes 'foo/*.py'
29 # - obvious conflict: 'packages' includes 'foo' and manifest
30 # includes '! foo/*.py' (can't imagine why you'd want this)
31 # - subtle conflict: 'packages' includes 'foo' and manifest
32 # includes '! foo/bar.py' (this could well be desired: eg. exclude
33 # an experimental module from distribution)
35 # Syntax for the manifest file:
36 # - if a line is just a Unix-style glob by itself, it's a "simple include
37 # pattern": go find all files that match and add them to the list
38 # of files
39 # - if a line is a glob preceded by "!", then it's a "simple exclude
40 # pattern": go over the current list of files and exclude any that
41 # match the glob pattern
42 # - if a line consists of a directory name followed by zero or more
43 # glob patterns, then we'll recursively explore that directory tree
44 # - the glob patterns can be include (no punctuation) or exclude
45 # (prefixed by "!", no space)
46 # - if no patterns given or the first pattern is not an include pattern,
47 # then assume "*" -- ie. find everything (and then start applying
48 # the rest of the patterns)
49 # - the patterns are given in order of increasing precedence, ie.
50 # the *last* one to match a given file applies to it
52 # example (ignoring auto-augmentation!):
53 # distutils/*.py
54 # distutils/command/*.py
55 # ! distutils/bleeding_edge.py
56 # examples/*.py
57 # examples/README
59 # smarter way (that *will* include distutils/command/bleeding_edge.py!)
60 # distutils *.py
61 # ! distutils/bleeding_edge.py
62 # examples !*~ !*.py[co] (same as: examples * !*~ !*.py[co])
63 # test test_* *.txt !*~ !*.py[co]
64 # README
65 # setup.py
67 # The actual Distutils manifest (don't need to mention source files,
68 # README, setup.py -- they're automatically distributed!):
69 # examples !*~ !*.py[co]
70 # test !*~ !*.py[co]
72 # The algorithm that will make it work:
73 # files = stuff from 'packages', 'py_modules', 'ext_modules',
74 # plus README, setup.py, ... ?
75 # foreach pattern in manifest file:
76 # if simple-include-pattern: # "distutils/*.py"
77 # files.append (glob (pattern))
78 # elif simple-exclude-pattern: # "! distutils/foo*"
79 # xfiles = glob (pattern)
80 # remove all xfiles from files
81 # elif recursive-pattern: # "examples" (just a directory name)
82 # patterns = rest-of-words-on-line
83 # dir_files = list of all files under dir
84 # if patterns:
85 # if patterns[0] is an exclude-pattern:
86 # insert "*" at patterns[0]
87 # for file in dir_files:
88 # for dpattern in reverse (patterns):
89 # if file matches dpattern:
90 # if dpattern is an include-pattern:
91 # files.append (file)
92 # else:
93 # nothing, don't include it
94 # next file
95 # else:
96 # files.extend (dir_files) # ie. accept all of them
99 # Anyways, this is all implemented below -- BUT it is largely untested; I
100 # know it works for the simple case of distributing the Distutils, but
101 # haven't tried it on more complicated examples. Undoubtedly doing so will
102 # reveal bugs and cause delays, so I'm waiting until after I've released
103 # Distutils 0.1.
106 # Other things we need to look for in creating a source distribution:
107 # - make sure there's a README
108 # - make sure the distribution meta-info is supplied and non-empty
109 # (*must* have name, version, ((author and author_email) or
110 # (maintainer and maintainer_email)), url
112 # Frills:
113 # - make sure the setup script is called "setup.py"
114 # - make sure the README refers to "setup.py" (ie. has a line matching
115 # /^\s*python\s+setup\.py/)
117 # A crazy idea that conflicts with having/requiring 'version' in setup.py:
118 # - make sure there's a version number in the "main file" (main file
119 # is __init__.py of first package, or the first module if no packages,
120 # or the first extension module if no pure Python modules)
121 # - XXX how do we look for __version__ in an extension module?
122 # - XXX do we import and look for __version__? or just scan source for
123 # /^__version__\s*=\s*"[^"]+"/ ?
124 # - what about 'version_from' as an alternative to 'version' -- then
125 # we know just where to search for the version -- no guessing about
126 # what the "main file" is
130 class Dist (Command):
132 options = [('formats=', 'f',
133 "formats for source distribution (tar, ztar, gztar, or zip)"),
134 ('manifest=', 'm',
135 "name of manifest file"),
136 ('list-only', 'l',
137 "just list files that would be distributed"),
140 default_format = { 'posix': 'gztar',
141 'nt': 'zip' }
143 exclude_re = re.compile (r'\s*!\s*(\S+)') # for manifest lines
146 def set_default_options (self):
147 self.formats = None
148 self.manifest = None
149 self.list_only = 0
152 def set_final_options (self):
153 if self.formats is None:
154 try:
155 self.formats = [self.default_format[os.name]]
156 except KeyError:
157 raise DistutilsPlatformError, \
158 "don't know how to build source distributions on " + \
159 "%s platform" % os.name
160 elif type (self.formats) is StringType:
161 self.formats = string.split (self.formats, ',')
163 if self.manifest is None:
164 self.manifest = "MANIFEST"
167 def run (self):
169 self.check_metadata ()
171 self.files = []
172 self.find_defaults ()
173 self.read_manifest ()
175 if self.list_only:
176 for f in self.files:
177 print f
179 else:
180 self.make_distribution ()
183 def check_metadata (self):
185 dist = self.distribution
187 missing = []
188 for attr in ('name', 'version', 'url'):
189 if not (hasattr (dist, attr) and getattr (dist, attr)):
190 missing.append (attr)
192 if missing:
193 self.warn ("missing required meta-data: " +
194 string.join (missing, ", "))
196 if dist.author:
197 if not dist.author_email:
198 self.warn ("missing meta-data: if 'author' supplied, " +
199 "'author_email' must be supplied too")
200 elif dist.maintainer:
201 if not dist.maintainer_email:
202 self.warn ("missing meta-data: if 'maintainer' supplied, " +
203 "'maintainer_email' must be supplied too")
204 else:
205 self.warn ("missing meta-data: either author (and author_email) " +
206 "or maintainer (and maintainer_email) " +
207 "must be supplied")
209 # check_metadata ()
212 def find_defaults (self):
214 standards = ['README', 'setup.py']
215 for fn in standards:
216 if os.path.exists (fn):
217 self.files.append (fn)
218 else:
219 self.warn ("standard file %s not found" % fn)
221 optional = ['test/test*.py']
222 for pattern in optional:
223 files = glob (pattern)
224 if files:
225 self.files.extend (files)
227 if self.distribution.packages or self.distribution.py_modules:
228 build_py = self.find_peer ('build_py')
229 build_py.ensure_ready ()
230 self.files.extend (build_py.get_source_files ())
232 if self.distribution.ext_modules:
233 build_ext = self.find_peer ('build_ext')
234 build_ext.ensure_ready ()
235 self.files.extend (build_ext.get_source_files ())
239 def open_manifest (self, filename):
240 return TextFile (filename,
241 strip_comments=1,
242 skip_blanks=1,
243 join_lines=1,
244 lstrip_ws=1,
245 rstrip_ws=1,
246 collapse_ws=1)
249 def search_dir (self, dir, patterns):
251 allfiles = findall (dir)
252 if patterns:
253 if patterns[0][0] == "!": # starts with an exclude spec?
254 patterns.insert (0, "*")# then accept anything that isn't
255 # explicitly excluded
257 act_patterns = [] # "action-patterns": (include,regexp)
258 # tuples where include is a boolean
259 for pattern in patterns:
260 if pattern[0] == '!':
261 act_patterns.append \
262 ((0, re.compile (fnmatch.translate (pattern[1:]))))
263 else:
264 act_patterns.append \
265 ((1, re.compile (fnmatch.translate (pattern))))
266 act_patterns.reverse()
269 files = []
270 for file in allfiles:
271 for (include,regexp) in act_patterns:
272 if regexp.match (file):
273 if include:
274 files.append (file)
275 break # continue to next file
276 else:
277 files = allfiles
279 return files
281 # search_dir ()
284 def exclude_files (self, pattern):
286 regexp = re.compile (fnmatch.translate (pattern))
287 for i in range (len (self.files)-1, -1, -1):
288 if regexp.match (self.files[i]):
289 del self.files[i]
292 def read_manifest (self):
294 # self.files had better already be defined (and hold the
295 # "automatically found" files -- Python modules and extensions,
296 # README, setup script, ...)
297 assert self.files is not None
299 manifest = self.open_manifest (self.manifest)
300 while 1:
302 pattern = manifest.readline()
303 if pattern is None: # end of file
304 break
306 # Cases:
307 # 1) simple-include: "*.py", "foo/*.py", "doc/*.html", "FAQ"
308 # 2) simple-exclude: same, prefaced by !
309 # 3) recursive: multi-word line, first word a directory
311 exclude = self.exclude_re.match (pattern)
312 if exclude:
313 pattern = exclude.group (1)
315 words = string.split (pattern)
316 assert words # must have something!
317 if os.name != 'posix':
318 words[0] = apply (os.path.join, string.split (words[0], '/'))
320 # First word is a directory, possibly with include/exclude
321 # patterns making up the rest of the line: it's a recursive
322 # pattern
323 if os.path.isdir (words[0]):
324 if exclude:
325 file.warn ("exclude (!) doesn't apply to " +
326 "whole directory trees")
327 continue
329 dir_files = self.search_dir (words[0], words[1:])
330 self.files.extend (dir_files)
332 # Multiple words in pattern: that's a no-no unless the first
333 # word is a directory name
334 elif len (words) > 1:
335 file.warn ("can't have multiple words unless first word " +
336 "('%s') is a directory name" % words[0])
337 continue
339 # Single word, no bang: it's a "simple include pattern"
340 elif not exclude:
341 matches = glob (pattern)
342 if matches:
343 self.files.extend (matches)
344 else:
345 manifest.warn ("no matches for '%s' found" % pattern)
348 # Single word prefixed with a bang: it's a "simple exclude pattern"
349 else:
350 if self.exclude_files (pattern) == 0:
351 file.warn ("no files excluded by '%s'" % pattern)
353 # if/elif/.../else on 'pattern'
355 # loop over lines of 'manifest'
357 # read_manifest ()
360 def make_release_tree (self, base_dir, files):
362 # XXX this is Unix-specific
364 # First get the list of directories to create
365 need_dir = {}
366 for file in files:
367 need_dir[os.path.join (base_dir, os.path.dirname (file))] = 1
368 need_dirs = need_dir.keys()
369 need_dirs.sort()
371 # Now create them
372 for dir in need_dirs:
373 self.mkpath (dir)
375 # And walk over the list of files, making a hard link for
376 # each one that doesn't already exist in its corresponding
377 # location under 'base_dir'
379 self.announce ("making hard links in %s..." % base_dir)
380 for file in files:
381 dest = os.path.join (base_dir, file)
382 if not os.path.exists (dest):
383 self.execute (os.link, (file, dest),
384 "linking %s -> %s" % (file, dest))
385 # make_release_tree ()
388 def make_tarball (self, base_dir):
390 # XXX GNU tar 1.13 has a nifty option to add a prefix directory.
391 # It's pretty new, though, so we certainly can't require it -- but
392 # it would be nice to take advantage of it to skip the "create a
393 # tree of hardlinks" step!
395 # But I am a lazy bastard, so I require GNU tar anyways.
397 archive_name = base_dir + ".tar.gz"
398 self.spawn (["tar", "-czf", archive_name, base_dir])
401 def make_zipfile (self, base_dir):
403 # This assumes the Unix 'zip' utility -- it could be easily recast
404 # to use pkzip (or whatever the command-line zip creation utility
405 # on Redmond's archaic CP/M knockoff is nowadays), but I'll let
406 # someone who can actually test it do that.
408 self.spawn (["zip", "-r", base_dir, base_dir])
411 def make_distribution (self):
413 # Don't warn about missing meta-data here -- should be done
414 # elsewhere.
415 name = self.distribution.name or "UNKNOWN"
416 version = self.distribution.version
418 if version:
419 base_dir = "%s-%s" % (name, version)
420 else:
421 base_dir = name
423 # Remove any files that match "base_dir" from the fileset -- we
424 # don't want to go distributing the distribution inside itself!
425 self.exclude_files (base_dir + "*")
427 self.make_release_tree (base_dir, self.files)
428 if 'gztar' in self.formats:
429 self.make_tarball (base_dir)
430 if 'zip' in self.formats:
431 self.make_zipfile (base_dir)
433 # class Dist
436 # ----------------------------------------------------------------------
437 # Utility functions
439 def findall (dir = os.curdir):
440 """Find all files under 'dir' and return the sorted list of full
441 filenames (relative to 'dir')."""
443 list = []
444 stack = [dir]
445 pop = stack.pop
446 push = stack.append
448 while stack:
449 dir = pop()
450 names = os.listdir (dir)
452 for name in names:
453 fullname = os.path.join (dir, name)
454 list.append (fullname)
455 if os.path.isdir (fullname) and not os.path.islink(fullname):
456 push (fullname)
458 list.sort()
459 return list
465 # ======================================================================
466 # Here follows some extensive mental masturbation about how to
467 # make the manifest file and search algorithm even more complex.
468 # I think this is all gratuitous, really.
470 # Hmm, something extra: want to apply an exclude pattern over a whole
471 # subtree without necessarily having to explicitly include files from it,
472 # ie. it should apply after gathering files by other means (simple
473 # include pattern)
474 # . !*~ !*.bak !#*#
475 # and we also want to prune at certain directories:
476 # . !RCS !CVS
477 # which again should apply globally.
479 # possible solution:
480 # - exclude pattern in a directory applies to all files found under that
481 # directory
482 # - subdirectories that match an exclude pattern will be pruned
483 # - hmmm, to be consistent, subdirectories that match an include
484 # pattern should be recursively included
485 # - and this should apply to "simple" patterns too
487 # thus:
489 # examples/
491 # means get everything in examples/ and all subdirs;
493 # examples/ !*~ !#*# !*.py[co]
495 # means get everything under examples/ except files matching those three globs;
497 # ./ !RCS !CVS
499 # means get everything under current dir, but prune RCS/CVS directories;
501 # ./ !*~ !#*# !*.py[co] !RCS !CVS
502 # ! build/
503 # ! experimental/
505 # means get everything under the distribution directory except the usual
506 # excludes at all levels; exclude "build" and "experimental" under the
507 # distribution dir only.
509 # Do the former examples still work?
511 # distutils/ *.py
512 # ! distutils/bleeding_edge.py
514 # means all .py files recursively found under distutils, except for the one
515 # explicitly named.
517 # distutils/ *.py !bleeding_edge.py
519 # means the same, except bleeding_edge.py will be excluded wherever it's
520 # found -- thus this can exclude up to one file per directory under
521 # distutils.
523 # distutils/*.py
524 # ! distutils/bleeding_edge.py
526 # gets exactly distutils/*.py, minus the one explicitly mentioned exclude, and
528 # distutils/*.py
529 # distutils/ !bleeding_edge.py
531 # coincidentally does the same, but only because there can only be one file
532 # that matches the exclude pattern. Oh, we'd still like
534 # distutils *.py !bleeding*.py
535 # distutils/bleeding_ledge.py
537 # to include distutils/bleeding_ledge.py -- i.e. it should override the
538 # earlier exclude pattern by virtue of appearing later in the manifest. Does
539 # this conflict with the above requirements, ie. that "!RCS" and "!*~" should
540 # apply everywhere? Hmm, I think it doesn't have to, as long as we're smart
541 # about it. Consequence:
543 # . !RCS !CVS
544 # distutils *
546 # will go ahead and include RCS and CVS files under distutils, but
548 # distutils *
549 # . !RCS !CVS
551 # will do the right thing. Hmmm. I think that's OK, and an inevitable
552 # consequence of the ability to override exclusions.
554 # OK, new crack at the search algorithm.
556 # for pattern in manifest:
557 # if dir-pattern: # ie. first word is a directory (incl. "."!)
558 # dir = first word on line
559 # patterns = rest of line
560 # if patterns:
561 # for dpattern in patterns:
562 # if exclude-pattern:
563 # remove from files anything matching dpattern (including pruning
564 # subtrees rooted at directories that match dpattern)
565 # else:
566 # files.append (recursive_glob (dir, dpattern))
567 # else:
568 # files.append (recursive_glob (dir, '*')
570 # elif include-pattern: # it's a "simple include pattern"
571 # files.append (glob (pattern))
573 # else: # it's a "simple exclude pattern"
574 # remove from files anything matching pattern
576 # The two removal algorithms might be a bit tricky:
578 # "remove simple exclude pattern":
579 # for f in files:
580 # if f matches pattern:
581 # delete it
583 # "remove recursive exclude pattern":
584 # for f in files:
586 # t = tail (f)
587 # while t:
588 # if t matches pattern:
589 # delete current file
590 # continue
591 # t = tail (t)
593 # Well, that was an interesting mental exercise. I'm not completely
594 # convinced it will work, nor am I convinced this level of complexity
595 # is necessary. If you want to exclude RCS or CVS directories, just
596 # don't bloody include them!