Lib/distutils/util.py

   1 """distutils.util
   2
   3 Miscellaneous utility functions -- anything that doesn't fit into
   4 one of the other *util.py modules.
   5 """
   6
   7 # created 1999/03/08, Greg Ward
   8
   9 __revision__ = "$Id$"
  10
  11 import sys, os, string, re
  12 from distutils.errors import DistutilsPlatformError
  13 from distutils.dep_util import newer
  14 from distutils.spawn import spawn
  15 from distutils import log
  16
  17 def get_platform ():
  18     """Return a string that identifies the current platform.  This is used
  19     mainly to distinguish platform-specific build directories and
  20     platform-specific built distributions.  Typically includes the OS name
  21     and version and the architecture (as supplied by 'os.uname()'),
  22     although the exact information included depends on the OS; eg. for IRIX
  23     the architecture isn't particularly important (IRIX only runs on SGI
  24     hardware), but for Linux the kernel version isn't particularly
  25     important.
  26
  27     Examples of returned values:
  28        linux-i586
  29        linux-alpha (?)
  30        solaris-2.6-sun4u
  31        irix-5.3
  32        irix64-6.2
  33
  34     For non-POSIX platforms, currently just returns 'sys.platform'.
  35     """
  36     if os.name != "posix" or not hasattr(os, 'uname'):
  37         # XXX what about the architecture? NT is Intel or Alpha,
  38         # Mac OS is M68k or PPC, etc.
  39         return sys.platform
  40
  41     # Try to distinguish various flavours of Unix
  42
  43     (osname, host, release, version, machine) = os.uname()
  44
  45     # Convert the OS name to lowercase and remove '/' characters
  46     # (to accommodate BSD/OS)
  47     osname = string.lower(osname)
  48     osname = string.replace(osname, '/', '')
  49
  50     if osname[:5] == "linux":
  51         # At least on Linux/Intel, 'machine' is the processor --
  52         # i386, etc.
  53         # XXX what about Alpha, SPARC, etc?
  54         return  "%s-%s" % (osname, machine)
  55     elif osname[:5] == "sunos":
  56         if release[0] >= "5":           # SunOS 5 == Solaris 2
  57             osname = "solaris"
  58             release = "%d.%s" % (int(release[0]) - 3, release[2:])
  59         # fall through to standard osname-release-machine representation
  60     elif osname[:4] == "irix":              # could be "irix64"!
  61         return "%s-%s" % (osname, release)
  62     elif osname[:3] == "aix":
  63         return "%s-%s.%s" % (osname, version, release)
  64     elif osname[:6] == "cygwin":
  65         osname = "cygwin"
  66         rel_re = re.compile (r'[\d.]+')
  67         m = rel_re.match(release)
  68         if m:
  69             release = m.group()
  70
  71     return "%s-%s-%s" % (osname, release, machine)
  72
  73 # get_platform ()
  74
  75
  76 def convert_path (pathname):
  77     """Return 'pathname' as a name that will work on the native filesystem,
  78     i.e. split it on '/' and put it back together again using the current
  79     directory separator.  Needed because filenames in the setup script are
  80     always supplied in Unix style, and have to be converted to the local
  81     convention before we can actually use them in the filesystem.  Raises
  82     ValueError on non-Unix-ish systems if 'pathname' either starts or
  83     ends with a slash.
  84     """
  85     if os.sep == '/':
  86         return pathname
  87     if pathname and pathname[0] == '/':
  88         raise ValueError, "path '%s' cannot be absolute" % pathname
  89     if pathname and pathname[-1] == '/':
  90         raise ValueError, "path '%s' cannot end with '/'" % pathname
  91
  92     paths = string.split(pathname, '/')
  93     while '.' in paths:
  94         paths.remove('.')
  95     if not paths:
  96         return os.curdir
  97     return apply(os.path.join, paths)
  98
  99 # convert_path ()
 100
 101
 102 def change_root (new_root, pathname):
 103     """Return 'pathname' with 'new_root' prepended.  If 'pathname' is
 104     relative, this is equivalent to "os.path.join(new_root,pathname)".
 105     Otherwise, it requires making 'pathname' relative and then joining the
 106     two, which is tricky on DOS/Windows and Mac OS.
 107     """
 108     if os.name == 'posix':
 109         if not os.path.isabs(pathname):
 110             return os.path.join(new_root, pathname)
 111         else:
 112             return os.path.join(new_root, pathname[1:])
 113
 114     elif os.name == 'nt':
 115         (drive, path) = os.path.splitdrive(pathname)
 116         if path[0] == '\\':
 117             path = path[1:]
 118         return os.path.join(new_root, path)
 119
 120     elif os.name == 'os2':
 121         (drive, path) = os.path.splitdrive(pathname)
 122         if path[0] == os.sep:
 123             path = path[1:]
 124         return os.path.join(new_root, path)
 125
 126     elif os.name == 'mac':
 127         if not os.path.isabs(pathname):
 128             return os.path.join(new_root, pathname)
 129         else:
 130             # Chop off volume name from start of path
 131             elements = string.split(pathname, ":", 1)
 132             pathname = ":" + elements[1]
 133             return os.path.join(new_root, pathname)
 134
 135     else:
 136         raise DistutilsPlatformError, \
 137               "nothing known about platform '%s'" % os.name
 138
 139
 140 _environ_checked = 0
 141 def check_environ ():
 142     """Ensure that 'os.environ' has all the environment variables we
 143     guarantee that users can use in config files, command-line options,
 144     etc.  Currently this includes:
 145       HOME - user's home directory (Unix only)
 146       PLAT - description of the current platform, including hardware
 147              and OS (see 'get_platform()')
 148     """
 149     global _environ_checked
 150     if _environ_checked:
 151         return
 152
 153     if os.name == 'posix' and not os.environ.has_key('HOME'):
 154         import pwd
 155         os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
 156
 157     if not os.environ.has_key('PLAT'):
 158         os.environ['PLAT'] = get_platform()
 159
 160     _environ_checked = 1
 161
 162
 163 def subst_vars (s, local_vars):
 164     """Perform shell/Perl-style variable substitution on 'string'.  Every
 165     occurrence of '$' followed by a name is considered a variable, and
 166     variable is substituted by the value found in the 'local_vars'
 167     dictionary, or in 'os.environ' if it's not in 'local_vars'.
 168     'os.environ' is first checked/augmented to guarantee that it contains
 169     certain values: see 'check_environ()'.  Raise ValueError for any
 170     variables not found in either 'local_vars' or 'os.environ'.
 171     """
 172     check_environ()
 173     def _subst (match, local_vars=local_vars):
 174         var_name = match.group(1)
 175         if local_vars.has_key(var_name):
 176             return str(local_vars[var_name])
 177         else:
 178             return os.environ[var_name]
 179
 180     try:
 181         return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s)
 182     except KeyError, var:
 183         raise ValueError, "invalid variable '$%s'" % var
 184
 185 # subst_vars ()
 186
 187
 188 def grok_environment_error (exc, prefix="error: "):
 189     """Generate a useful error message from an EnvironmentError (IOError or
 190     OSError) exception object.  Handles Python 1.5.1 and 1.5.2 styles, and
 191     does what it can to deal with exception objects that don't have a
 192     filename (which happens when the error is due to a two-file operation,
 193     such as 'rename()' or 'link()'.  Returns the error message as a string
 194     prefixed with 'prefix'.
 195     """
 196     # check for Python 1.5.2-style {IO,OS}Error exception objects
 197     if hasattr(exc, 'filename') and hasattr(exc, 'strerror'):
 198         if exc.filename:
 199             error = prefix + "%s: %s" % (exc.filename, exc.strerror)
 200         else:
 201             # two-argument functions in posix module don't
 202             # include the filename in the exception object!
 203             error = prefix + "%s" % exc.strerror
 204     else:
 205         error = prefix + str(exc[-1])
 206
 207     return error
 208
 209
 210 # Needed by 'split_quoted()'
 211 _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace)
 212 _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
 213 _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
 214
 215 def split_quoted (s):
 216     """Split a string up according to Unix shell-like rules for quotes and
 217     backslashes.  In short: words are delimited by spaces, as long as those
 218     spaces are not escaped by a backslash, or inside a quoted string.
 219     Single and double quotes are equivalent, and the quote characters can
 220     be backslash-escaped.  The backslash is stripped from any two-character
 221     escape sequence, leaving only the escaped character.  The quote
 222     characters are stripped from any quoted string.  Returns a list of
 223     words.
 224     """
 225
 226     # This is a nice algorithm for splitting up a single string, since it
 227     # doesn't require character-by-character examination.  It was a little
 228     # bit of a brain-bender to get it working right, though...
 229
 230     s = string.strip(s)
 231     words = []
 232     pos = 0
 233
 234     while s:
 235         m = _wordchars_re.match(s, pos)
 236         end = m.end()
 237         if end == len(s):
 238             words.append(s[:end])
 239             break
 240
 241         if s[end] in string.whitespace: # unescaped, unquoted whitespace: now
 242             words.append(s[:end])       # we definitely have a word delimiter
 243             s = string.lstrip(s[end:])
 244             pos = 0
 245
 246         elif s[end] == '\\':            # preserve whatever is being escaped;
 247                                         # will become part of the current word
 248             s = s[:end] + s[end+1:]
 249             pos = end+1
 250
 251         else:
 252             if s[end] == "'":           # slurp singly-quoted string
 253                 m = _squote_re.match(s, end)
 254             elif s[end] == '"':         # slurp doubly-quoted string
 255                 m = _dquote_re.match(s, end)
 256             else:
 257                 raise RuntimeError, \
 258                       "this can't happen (bad char '%c')" % s[end]
 259
 260             if m is None:
 261                 raise ValueError, \
 262                       "bad string (mismatched %s quotes?)" % s[end]
 263
 264             (beg, end) = m.span()
 265             s = s[:beg] + s[beg+1:end-1] + s[end:]
 266             pos = m.end() - 2
 267
 268         if pos >= len(s):
 269             words.append(s)
 270             break
 271
 272     return words
 273
 274 # split_quoted ()
 275
 276
 277 def execute (func, args, msg=None, verbose=0, dry_run=0):
 278     """Perform some action that affects the outside world (eg.  by
 279     writing to the filesystem).  Such actions are special because they
 280     are disabled by the 'dry_run' flag.  This method takes care of all
 281     that bureaucracy for you; all you have to do is supply the
 282     function to call and an argument tuple for it (to embody the
 283     "external action" being performed), and an optional message to
 284     print.
 285     """
 286     if msg is None:
 287         msg = "%s%s" % (func.__name__, `args`)
 288         if msg[-2:] == ',)':        # correct for singleton tuple
 289             msg = msg[0:-2] + ')'
 290
 291     log.info(msg)
 292     if not dry_run:
 293         apply(func, args)
 294
 295
 296 def strtobool (val):
 297     """Convert a string representation of truth to true (1) or false (0).
 298
 299     True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
 300     are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
 301     'val' is anything else.
 302     """
 303     val = string.lower(val)
 304     if val in ('y', 'yes', 't', 'true', 'on', '1'):
 305         return 1
 306     elif val in ('n', 'no', 'f', 'false', 'off', '0'):
 307         return 0
 308     else:
 309         raise ValueError, "invalid truth value %s" % `val`
 310
 311
 312 def byte_compile (py_files,
 313                   optimize=0, force=0,
 314                   prefix=None, base_dir=None,
 315                   verbose=1, dry_run=0,
 316                   direct=None):
 317     """Byte-compile a collection of Python source files to either .pyc
 318     or .pyo files in the same directory.  'py_files' is a list of files
 319     to compile; any files that don't end in ".py" are silently skipped.
 320     'optimize' must be one of the following:
 321       0 - don't optimize (generate .pyc)
 322       1 - normal optimization (like "python -O")
 323       2 - extra optimization (like "python -OO")
 324     If 'force' is true, all files are recompiled regardless of
 325     timestamps.
 326
 327     The source filename encoded in each bytecode file defaults to the
 328     filenames listed in 'py_files'; you can modify these with 'prefix' and
 329     'basedir'.  'prefix' is a string that will be stripped off of each
 330     source filename, and 'base_dir' is a directory name that will be
 331     prepended (after 'prefix' is stripped).  You can supply either or both
 332     (or neither) of 'prefix' and 'base_dir', as you wish.
 333
 334     If 'dry_run' is true, doesn't actually do anything that would
 335     affect the filesystem.
 336
 337     Byte-compilation is either done directly in this interpreter process
 338     with the standard py_compile module, or indirectly by writing a
 339     temporary script and executing it.  Normally, you should let
 340     'byte_compile()' figure out to use direct compilation or not (see
 341     the source for details).  The 'direct' flag is used by the script
 342     generated in indirect mode; unless you know what you're doing, leave
 343     it set to None.
 344     """
 345
 346     # First, if the caller didn't force us into direct or indirect mode,
 347     # figure out which mode we should be in.  We take a conservative
 348     # approach: choose direct mode *only* if the current interpreter is
 349     # in debug mode and optimize is 0.  If we're not in debug mode (-O
 350     # or -OO), we don't know which level of optimization this
 351     # interpreter is running with, so we can't do direct
 352     # byte-compilation and be certain that it's the right thing.  Thus,
 353     # always compile indirectly if the current interpreter is in either
 354     # optimize mode, or if either optimization level was requested by
 355     # the caller.
 356     if direct is None:
 357         direct = (__debug__ and optimize == 0)
 358
 359     # "Indirect" byte-compilation: write a temporary script and then
 360     # run it with the appropriate flags.
 361     if not direct:
 362         from tempfile import mktemp
 363         script_name = mktemp(".py")
 364         log.info("writing byte-compilation script '%s'", script_name)
 365         if not dry_run:
 366             script = open(script_name, "w")
 367
 368             script.write("""\
 369 from distutils.util import byte_compile
 370 files = [
 371 """)
 372
 373             # XXX would be nice to write absolute filenames, just for
 374             # safety's sake (script should be more robust in the face of
 375             # chdir'ing before running it).  But this requires abspath'ing
 376             # 'prefix' as well, and that breaks the hack in build_lib's
 377             # 'byte_compile()' method that carefully tacks on a trailing
 378             # slash (os.sep really) to make sure the prefix here is "just
 379             # right".  This whole prefix business is rather delicate -- the
 380             # problem is that it's really a directory, but I'm treating it
 381             # as a dumb string, so trailing slashes and so forth matter.
 382
 383             #py_files = map(os.path.abspath, py_files)
 384             #if prefix:
 385             #    prefix = os.path.abspath(prefix)
 386
 387             script.write(string.join(map(repr, py_files), ",\n") + "]\n")
 388             script.write("""
 389 byte_compile(files, optimize=%s, force=%s,
 390              prefix=%s, base_dir=%s,
 391              verbose=%s, dry_run=0,
 392              direct=1)
 393 """ % (`optimize`, `force`, `prefix`, `base_dir`, `verbose`))
 394
 395             script.close()
 396
 397         cmd = [sys.executable, script_name]
 398         if optimize == 1:
 399             cmd.insert(1, "-O")
 400         elif optimize == 2:
 401             cmd.insert(1, "-OO")
 402         spawn(cmd, dry_run=dry_run)
 403         execute(os.remove, (script_name,), "removing %s" % script_name,
 404                 dry_run=dry_run)
 405
 406     # "Direct" byte-compilation: use the py_compile module to compile
 407     # right here, right now.  Note that the script generated in indirect
 408     # mode simply calls 'byte_compile()' in direct mode, a weird sort of
 409     # cross-process recursion.  Hey, it works!
 410     else:
 411         from py_compile import compile
 412
 413         for file in py_files:
 414             if file[-3:] != ".py":
 415                 # This lets us be lazy and not filter filenames in
 416                 # the "install_lib" command.
 417                 continue
 418
 419             # Terminology from the py_compile module:
 420             #   cfile - byte-compiled file
 421             #   dfile - purported source filename (same as 'file' by default)
 422             cfile = file + (__debug__ and "c" or "o")
 423             dfile = file
 424             if prefix:
 425                 if file[:len(prefix)] != prefix:
 426                     raise ValueError, \
 427                           ("invalid prefix: filename %s doesn't start with %s"
 428                            % (`file`, `prefix`))
 429                 dfile = dfile[len(prefix):]
 430             if base_dir:
 431                 dfile = os.path.join(base_dir, dfile)
 432
 433             cfile_base = os.path.basename(cfile)
 434             if direct:
 435                 if force or newer(file, cfile):
 436                     log.info("byte-compiling %s to %s", file, cfile_base)
 437                     if not dry_run:
 438                         compile(file, cfile, dfile)
 439                 else:
 440                     log.debug("skipping byte-compilation of %s to %s",
 441                               file, cfile_base)
 442
 443 # byte_compile ()
 444
 445 def rfc822_escape (header):
 446     """Return a version of the string escaped for inclusion in an
 447     RFC-822 header, by ensuring there are 8 spaces space after each newline.
 448     """
 449     lines = string.split(header, '\n')
 450     lines = map(string.strip, lines)
 451     header = string.join(lines, '\n' + 8*' ')
 452     return header