Lib/distutils/util.py

   1 """distutils.util
   2
   3 Miscellaneous utility functions -- anything that doesn't fit into
   4 one of the other *util.py modules.
   5 """
   6
   7 # created 1999/03/08, Greg Ward
   8
   9 __revision__ = "$Id$"
  10
  11 import sys, os, string, re
  12 from distutils.errors import DistutilsPlatformError
  13 from distutils.dep_util import newer
  14 from distutils.spawn import spawn
  15
  16
  17 def get_platform ():
  18     """Return a string that identifies the current platform.  This is used
  19     mainly to distinguish platform-specific build directories and
  20     platform-specific built distributions.  Typically includes the OS name
  21     and version and the architecture (as supplied by 'os.uname()'),
  22     although the exact information included depends on the OS; eg. for IRIX
  23     the architecture isn't particularly important (IRIX only runs on SGI
  24     hardware), but for Linux the kernel version isn't particularly
  25     important.
  26
  27     Examples of returned values:
  28        linux-i586
  29        linux-alpha (?)
  30        solaris-2.6-sun4u
  31        irix-5.3
  32        irix64-6.2
  33
  34     For non-POSIX platforms, currently just returns 'sys.platform'.
  35     """
  36     if os.name != "posix" or not hasattr(os, 'uname'):
  37         # XXX what about the architecture? NT is Intel or Alpha,
  38         # Mac OS is M68k or PPC, etc.
  39         return sys.platform
  40
  41     # Try to distinguish various flavours of Unix
  42
  43     (osname, host, release, version, machine) = os.uname()
  44
  45     # Convert the OS name to lowercase and remove '/' characters
  46     # (to accommodate BSD/OS)
  47     osname = string.lower(osname)
  48     osname = string.replace(osname, '/', '')
  49
  50     if osname[:5] == "linux":
  51         # At least on Linux/Intel, 'machine' is the processor --
  52         # i386, etc.
  53         # XXX what about Alpha, SPARC, etc?
  54         return  "%s-%s" % (osname, machine)
  55     elif osname[:5] == "sunos":
  56         if release[0] >= "5":           # SunOS 5 == Solaris 2
  57             osname = "solaris"
  58             release = "%d.%s" % (int(release[0]) - 3, release[2:])
  59         # fall through to standard osname-release-machine representation
  60     elif osname[:4] == "irix":              # could be "irix64"!
  61         return "%s-%s" % (osname, release)
  62     elif osname[:3] == "aix":
  63         return "%s-%s.%s" % (osname, version, release)
  64     elif osname[:6] == "cygwin":
  65         osname = "cygwin"
  66         rel_re = re.compile (r'[\d.]+')
  67         m = rel_re.match(release)
  68         if m:
  69             release = m.group()
  70
  71     return "%s-%s-%s" % (osname, release, machine)
  72
  73 # get_platform ()
  74
  75
  76 def convert_path (pathname):
  77     """Return 'pathname' as a name that will work on the native filesystem,
  78     i.e. split it on '/' and put it back together again using the current
  79     directory separator.  Needed because filenames in the setup script are
  80     always supplied in Unix style, and have to be converted to the local
  81     convention before we can actually use them in the filesystem.  Raises
  82     ValueError on non-Unix-ish systems if 'pathname' either starts or
  83     ends with a slash.
  84     """
  85     if os.sep == '/':
  86         return pathname
  87     if pathname[0] == '/':
  88         raise ValueError, "path '%s' cannot be absolute" % pathname
  89     if pathname[-1] == '/':
  90         raise ValueError, "path '%s' cannot end with '/'" % pathname
  91
  92     paths = string.split(pathname, '/')
  93     while '.' in paths:
  94         paths.remove('.')
  95     if not paths:
  96         return os.curdir
  97     return apply(os.path.join, paths)
  98
  99 # convert_path ()
 100
 101
 102 def change_root (new_root, pathname):
 103     """Return 'pathname' with 'new_root' prepended.  If 'pathname' is
 104     relative, this is equivalent to "os.path.join(new_root,pathname)".
 105     Otherwise, it requires making 'pathname' relative and then joining the
 106     two, which is tricky on DOS/Windows and Mac OS.
 107     """
 108     if os.name == 'posix':
 109         if not os.path.isabs(pathname):
 110             return os.path.join(new_root, pathname)
 111         else:
 112             return os.path.join(new_root, pathname[1:])
 113
 114     elif os.name == 'nt':
 115         (drive, path) = os.path.splitdrive(pathname)
 116         if path[0] == '\\':
 117             path = path[1:]
 118         return os.path.join(new_root, path)
 119
 120     elif os.name == 'mac':
 121         if not os.path.isabs(pathname):
 122             return os.path.join(new_root, pathname)
 123         else:
 124             # Chop off volume name from start of path
 125             elements = string.split(pathname, ":", 1)
 126             pathname = ":" + elements[1]
 127             return os.path.join(new_root, pathname)
 128
 129     else:
 130         raise DistutilsPlatformError, \
 131               "nothing known about platform '%s'" % os.name
 132
 133
 134 _environ_checked = 0
 135 def check_environ ():
 136     """Ensure that 'os.environ' has all the environment variables we
 137     guarantee that users can use in config files, command-line options,
 138     etc.  Currently this includes:
 139       HOME - user's home directory (Unix only)
 140       PLAT - description of the current platform, including hardware
 141              and OS (see 'get_platform()')
 142     """
 143     global _environ_checked
 144     if _environ_checked:
 145         return
 146
 147     if os.name == 'posix' and not os.environ.has_key('HOME'):
 148         import pwd
 149         os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
 150
 151     if not os.environ.has_key('PLAT'):
 152         os.environ['PLAT'] = get_platform()
 153
 154     _environ_checked = 1
 155
 156
 157 def subst_vars (s, local_vars):
 158     """Perform shell/Perl-style variable substitution on 'string'.  Every
 159     occurrence of '$' followed by a name is considered a variable, and
 160     variable is substituted by the value found in the 'local_vars'
 161     dictionary, or in 'os.environ' if it's not in 'local_vars'.
 162     'os.environ' is first checked/augmented to guarantee that it contains
 163     certain values: see 'check_environ()'.  Raise ValueError for any
 164     variables not found in either 'local_vars' or 'os.environ'.
 165     """
 166     check_environ()
 167     def _subst (match, local_vars=local_vars):
 168         var_name = match.group(1)
 169         if local_vars.has_key(var_name):
 170             return str(local_vars[var_name])
 171         else:
 172             return os.environ[var_name]
 173
 174     try:
 175         return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s)
 176     except KeyError, var:
 177         raise ValueError, "invalid variable '$%s'" % var
 178
 179 # subst_vars ()
 180
 181
 182 def grok_environment_error (exc, prefix="error: "):
 183     """Generate a useful error message from an EnvironmentError (IOError or
 184     OSError) exception object.  Handles Python 1.5.1 and 1.5.2 styles, and
 185     does what it can to deal with exception objects that don't have a
 186     filename (which happens when the error is due to a two-file operation,
 187     such as 'rename()' or 'link()'.  Returns the error message as a string
 188     prefixed with 'prefix'.
 189     """
 190     # check for Python 1.5.2-style {IO,OS}Error exception objects
 191     if hasattr(exc, 'filename') and hasattr(exc, 'strerror'):
 192         if exc.filename:
 193             error = prefix + "%s: %s" % (exc.filename, exc.strerror)
 194         else:
 195             # two-argument functions in posix module don't
 196             # include the filename in the exception object!
 197             error = prefix + "%s" % exc.strerror
 198     else:
 199         error = prefix + str(exc[-1])
 200
 201     return error
 202
 203
 204 # Needed by 'split_quoted()'
 205 _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace)
 206 _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
 207 _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
 208
 209 def split_quoted (s):
 210     """Split a string up according to Unix shell-like rules for quotes and
 211     backslashes.  In short: words are delimited by spaces, as long as those
 212     spaces are not escaped by a backslash, or inside a quoted string.
 213     Single and double quotes are equivalent, and the quote characters can
 214     be backslash-escaped.  The backslash is stripped from any two-character
 215     escape sequence, leaving only the escaped character.  The quote
 216     characters are stripped from any quoted string.  Returns a list of
 217     words.
 218     """
 219
 220     # This is a nice algorithm for splitting up a single string, since it
 221     # doesn't require character-by-character examination.  It was a little
 222     # bit of a brain-bender to get it working right, though...
 223
 224     s = string.strip(s)
 225     words = []
 226     pos = 0
 227
 228     while s:
 229         m = _wordchars_re.match(s, pos)
 230         end = m.end()
 231         if end == len(s):
 232             words.append(s[:end])
 233             break
 234
 235         if s[end] in string.whitespace: # unescaped, unquoted whitespace: now
 236             words.append(s[:end])       # we definitely have a word delimiter
 237             s = string.lstrip(s[end:])
 238             pos = 0
 239
 240         elif s[end] == '\\':            # preserve whatever is being escaped;
 241                                         # will become part of the current word
 242             s = s[:end] + s[end+1:]
 243             pos = end+1
 244
 245         else:
 246             if s[end] == "'":           # slurp singly-quoted string
 247                 m = _squote_re.match(s, end)
 248             elif s[end] == '"':         # slurp doubly-quoted string
 249                 m = _dquote_re.match(s, end)
 250             else:
 251                 raise RuntimeError, \
 252                       "this can't happen (bad char '%c')" % s[end]
 253
 254             if m is None:
 255                 raise ValueError, \
 256                       "bad string (mismatched %s quotes?)" % s[end]
 257
 258             (beg, end) = m.span()
 259             s = s[:beg] + s[beg+1:end-1] + s[end:]
 260             pos = m.end() - 2
 261
 262         if pos >= len(s):
 263             words.append(s)
 264             break
 265
 266     return words
 267
 268 # split_quoted ()
 269
 270
 271 def execute (func, args, msg=None, verbose=0, dry_run=0):
 272     """Perform some action that affects the outside world (eg.  by writing
 273     to the filesystem).  Such actions are special because they are disabled
 274     by the 'dry_run' flag, and announce themselves if 'verbose' is true.
 275     This method takes care of all that bureaucracy for you; all you have to
 276     do is supply the function to call and an argument tuple for it (to
 277     embody the "external action" being performed), and an optional message
 278     to print.
 279     """
 280     # Generate a message if we weren't passed one
 281     if msg is None:
 282         msg = "%s%s" % (func.__name__, `args`)
 283         if msg[-2:] == ',)':        # correct for singleton tuple
 284             msg = msg[0:-2] + ')'
 285
 286     # Print it if verbosity level is high enough
 287     if verbose:
 288         print msg
 289
 290     # And do it, as long as we're not in dry-run mode
 291     if not dry_run:
 292         apply(func, args)
 293
 294 # execute()
 295
 296
 297 def strtobool (val):
 298     """Convert a string representation of truth to true (1) or false (0).
 299     True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
 300     are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
 301     'val' is anything else.
 302     """
 303     val = string.lower(val)
 304     if val in ('y', 'yes', 't', 'true', 'on', '1'):
 305         return 1
 306     elif val in ('n', 'no', 'f', 'false', 'off', '0'):
 307         return 0
 308     else:
 309         raise ValueError, "invalid truth value %s" % `val`
 310
 311
 312 def byte_compile (py_files,
 313                   optimize=0, force=0,
 314                   prefix=None, base_dir=None,
 315                   verbose=1, dry_run=0,
 316                   direct=None):
 317     """Byte-compile a collection of Python source files to either .pyc
 318     or .pyo files in the same directory.  'py_files' is a list of files
 319     to compile; any files that don't end in ".py" are silently skipped.
 320     'optimize' must be one of the following:
 321       0 - don't optimize (generate .pyc)
 322       1 - normal optimization (like "python -O")
 323       2 - extra optimization (like "python -OO")
 324     If 'force' is true, all files are recompiled regardless of
 325     timestamps.
 326
 327     The source filename encoded in each bytecode file defaults to the
 328     filenames listed in 'py_files'; you can modify these with 'prefix' and
 329     'basedir'.  'prefix' is a string that will be stripped off of each
 330     source filename, and 'base_dir' is a directory name that will be
 331     prepended (after 'prefix' is stripped).  You can supply either or both
 332     (or neither) of 'prefix' and 'base_dir', as you wish.
 333
 334     If 'verbose' is true, prints out a report of each file.  If 'dry_run'
 335     is true, doesn't actually do anything that would affect the filesystem.
 336
 337     Byte-compilation is either done directly in this interpreter process
 338     with the standard py_compile module, or indirectly by writing a
 339     temporary script and executing it.  Normally, you should let
 340     'byte_compile()' figure out to use direct compilation or not (see
 341     the source for details).  The 'direct' flag is used by the script
 342     generated in indirect mode; unless you know what you're doing, leave
 343     it set to None.
 344     """
 345
 346     # First, if the caller didn't force us into direct or indirect mode,
 347     # figure out which mode we should be in.  We take a conservative
 348     # approach: choose direct mode *only* if the current interpreter is
 349     # in debug mode and optimize is 0.  If we're not in debug mode (-O
 350     # or -OO), we don't know which level of optimization this
 351     # interpreter is running with, so we can't do direct
 352     # byte-compilation and be certain that it's the right thing.  Thus,
 353     # always compile indirectly if the current interpreter is in either
 354     # optimize mode, or if either optimization level was requested by
 355     # the caller.
 356     if direct is None:
 357         direct = (__debug__ and optimize == 0)
 358
 359     # "Indirect" byte-compilation: write a temporary script and then
 360     # run it with the appropriate flags.
 361     if not direct:
 362         from tempfile import mktemp
 363         script_name = mktemp(".py")
 364         if verbose:
 365             print "writing byte-compilation script '%s'" % script_name
 366         if not dry_run:
 367             script = open(script_name, "w")
 368
 369             script.write("""\
 370 from distutils.util import byte_compile
 371 files = [
 372 """)
 373
 374             # XXX would be nice to write absolute filenames, just for
 375             # safety's sake (script should be more robust in the face of
 376             # chdir'ing before running it).  But this requires abspath'ing
 377             # 'prefix' as well, and that breaks the hack in build_lib's
 378             # 'byte_compile()' method that carefully tacks on a trailing
 379             # slash (os.sep really) to make sure the prefix here is "just
 380             # right".  This whole prefix business is rather delicate -- the
 381             # problem is that it's really a directory, but I'm treating it
 382             # as a dumb string, so trailing slashes and so forth matter.
 383
 384             #py_files = map(os.path.abspath, py_files)
 385             #if prefix:
 386             #    prefix = os.path.abspath(prefix)
 387
 388             script.write(string.join(map(repr, py_files), ",\n") + "]\n")
 389             script.write("""
 390 byte_compile(files, optimize=%s, force=%s,
 391              prefix=%s, base_dir=%s,
 392              verbose=%s, dry_run=0,
 393              direct=1)
 394 """ % (`optimize`, `force`, `prefix`, `base_dir`, `verbose`))
 395
 396             script.close()
 397
 398         cmd = [sys.executable, script_name]
 399         if optimize == 1:
 400             cmd.insert(1, "-O")
 401         elif optimize == 2:
 402             cmd.insert(1, "-OO")
 403         spawn(cmd, verbose=verbose, dry_run=dry_run)
 404         execute(os.remove, (script_name,), "removing %s" % script_name,
 405                 verbose=verbose, dry_run=dry_run)
 406
 407     # "Direct" byte-compilation: use the py_compile module to compile
 408     # right here, right now.  Note that the script generated in indirect
 409     # mode simply calls 'byte_compile()' in direct mode, a weird sort of
 410     # cross-process recursion.  Hey, it works!
 411     else:
 412         from py_compile import compile
 413
 414         for file in py_files:
 415             if file[-3:] != ".py":
 416                 # This lets us be lazy and not filter filenames in
 417                 # the "install_lib" command.
 418                 continue
 419
 420             # Terminology from the py_compile module:
 421             #   cfile - byte-compiled file
 422             #   dfile - purported source filename (same as 'file' by default)
 423             cfile = file + (__debug__ and "c" or "o")
 424             dfile = file
 425             if prefix:
 426                 if file[:len(prefix)] != prefix:
 427                     raise ValueError, \
 428                           ("invalid prefix: filename %s doesn't start with %s"
 429                            % (`file`, `prefix`))
 430                 dfile = dfile[len(prefix):]
 431             if base_dir:
 432                 dfile = os.path.join(base_dir, dfile)
 433
 434             cfile_base = os.path.basename(cfile)
 435             if direct:
 436                 if force or newer(file, cfile):
 437                     if verbose:
 438                         print "byte-compiling %s to %s" % (file, cfile_base)
 439                     if not dry_run:
 440                         compile(file, cfile, dfile)
 441                 else:
 442                     if verbose:
 443                         print "skipping byte-compilation of %s to %s" % \
 444                               (file, cfile_base)
 445
 446 # byte_compile ()
 447
 448 def rfc822_escape (header):
 449     """Return a version of the string escaped for inclusion in an
 450     RFC-822 header, by ensuring there are 8 spaces space after each newline.
 451     """
 452     lines = string.split(header, '\n')
 453     lines = map(string.strip, lines)
 454     header = string.join(lines, '\n' + 8*' ')
 455     return header