Lib/distutils/util.py

   1 """distutils.util
   2
   3 Miscellaneous utility functions -- anything that doesn't fit into
   4 one of the other *util.py modules.
   5 """
   6
   7 # created 1999/03/08, Greg Ward
   8
   9 __revision__ = "$Id$"
  10
  11 import sys, os, string, re
  12 from distutils.errors import DistutilsPlatformError
  13 from distutils.dep_util import newer
  14 from distutils.spawn import spawn
  15
  16
  17 def get_platform ():
  18     """Return a string that identifies the current platform.  This is used
  19     mainly to distinguish platform-specific build directories and
  20     platform-specific built distributions.  Typically includes the OS name
  21     and version and the architecture (as supplied by 'os.uname()'),
  22     although the exact information included depends on the OS; eg. for IRIX
  23     the architecture isn't particularly important (IRIX only runs on SGI
  24     hardware), but for Linux the kernel version isn't particularly
  25     important.
  26
  27     Examples of returned values:
  28        linux-i586
  29        linux-alpha (?)
  30        solaris-2.6-sun4u
  31        irix-5.3
  32        irix64-6.2
  33
  34     For non-POSIX platforms, currently just returns 'sys.platform'.
  35     """
  36     if os.name != "posix" or not hasattr(os, 'uname'):
  37         # XXX what about the architecture? NT is Intel or Alpha,
  38         # Mac OS is M68k or PPC, etc.
  39         return sys.platform
  40
  41     # Try to distinguish various flavours of Unix
  42
  43     (osname, host, release, version, machine) = os.uname()
  44
  45     # Convert the OS name to lowercase and remove '/' characters
  46     # (to accommodate BSD/OS)
  47     osname = string.lower(osname)
  48     osname = string.replace(osname, '/', '')
  49
  50     if osname[:5] == "linux":
  51         # At least on Linux/Intel, 'machine' is the processor --
  52         # i386, etc.
  53         # XXX what about Alpha, SPARC, etc?
  54         return  "%s-%s" % (osname, machine)
  55     elif osname[:5] == "sunos":
  56         if release[0] >= "5":           # SunOS 5 == Solaris 2
  57             osname = "solaris"
  58             release = "%d.%s" % (int(release[0]) - 3, release[2:])
  59         # fall through to standard osname-release-machine representation
  60     elif osname[:4] == "irix":              # could be "irix64"!
  61         return "%s-%s" % (osname, release)
  62     elif osname[:3] == "aix":
  63         return "%s-%s.%s" % (osname, version, release)
  64     elif osname[:6] == "cygwin":
  65         rel_re = re.compile (r'[\d.]+')
  66         m = rel_re.match(release)
  67         if m:
  68             release = m.group()
  69
  70     return "%s-%s-%s" % (osname, release, machine)
  71
  72 # get_platform ()
  73
  74
  75 def convert_path (pathname):
  76     """Return 'pathname' as a name that will work on the native filesystem,
  77     i.e. split it on '/' and put it back together again using the current
  78     directory separator.  Needed because filenames in the setup script are
  79     always supplied in Unix style, and have to be converted to the local
  80     convention before we can actually use them in the filesystem.  Raises
  81     ValueError on non-Unix-ish systems if 'pathname' either starts or
  82     ends with a slash.
  83     """
  84     if os.sep == '/':
  85         return pathname
  86     if pathname[0] == '/':
  87         raise ValueError, "path '%s' cannot be absolute" % pathname
  88     if pathname[-1] == '/':
  89         raise ValueError, "path '%s' cannot end with '/'" % pathname
  90
  91     paths = string.split(pathname, '/')
  92     while '.' in paths:
  93         paths.remove('.')
  94     if not paths:
  95         return os.curdir
  96     return apply(os.path.join, paths)
  97
  98 # convert_path ()
  99
 100
 101 def change_root (new_root, pathname):
 102     """Return 'pathname' with 'new_root' prepended.  If 'pathname' is
 103     relative, this is equivalent to "os.path.join(new_root,pathname)".
 104     Otherwise, it requires making 'pathname' relative and then joining the
 105     two, which is tricky on DOS/Windows and Mac OS.
 106     """
 107     if os.name == 'posix':
 108         if not os.path.isabs(pathname):
 109             return os.path.join(new_root, pathname)
 110         else:
 111             return os.path.join(new_root, pathname[1:])
 112
 113     elif os.name == 'nt':
 114         (drive, path) = os.path.splitdrive(pathname)
 115         if path[0] == '\\':
 116             path = path[1:]
 117         return os.path.join(new_root, path)
 118
 119     elif os.name == 'mac':
 120         if not os.path.isabs(pathname):
 121             return os.path.join(new_root, pathname)
 122         else:
 123             # Chop off volume name from start of path
 124             elements = string.split(pathname, ":", 1)
 125             pathname = ":" + elements[1]
 126             return os.path.join(new_root, pathname)
 127
 128     else:
 129         raise DistutilsPlatformError, \
 130               "nothing known about platform '%s'" % os.name
 131
 132
 133 _environ_checked = 0
 134 def check_environ ():
 135     """Ensure that 'os.environ' has all the environment variables we
 136     guarantee that users can use in config files, command-line options,
 137     etc.  Currently this includes:
 138       HOME - user's home directory (Unix only)
 139       PLAT - description of the current platform, including hardware
 140              and OS (see 'get_platform()')
 141     """
 142     global _environ_checked
 143     if _environ_checked:
 144         return
 145
 146     if os.name == 'posix' and not os.environ.has_key('HOME'):
 147         import pwd
 148         os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
 149
 150     if not os.environ.has_key('PLAT'):
 151         os.environ['PLAT'] = get_platform()
 152
 153     _environ_checked = 1
 154
 155
 156 def subst_vars (s, local_vars):
 157     """Perform shell/Perl-style variable substitution on 'string'.  Every
 158     occurrence of '$' followed by a name is considered a variable, and
 159     variable is substituted by the value found in the 'local_vars'
 160     dictionary, or in 'os.environ' if it's not in 'local_vars'.
 161     'os.environ' is first checked/augmented to guarantee that it contains
 162     certain values: see 'check_environ()'.  Raise ValueError for any
 163     variables not found in either 'local_vars' or 'os.environ'.
 164     """
 165     check_environ()
 166     def _subst (match, local_vars=local_vars):
 167         var_name = match.group(1)
 168         if local_vars.has_key(var_name):
 169             return str(local_vars[var_name])
 170         else:
 171             return os.environ[var_name]
 172
 173     try:
 174         return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s)
 175     except KeyError, var:
 176         raise ValueError, "invalid variable '$%s'" % var
 177
 178 # subst_vars ()
 179
 180
 181 def grok_environment_error (exc, prefix="error: "):
 182     """Generate a useful error message from an EnvironmentError (IOError or
 183     OSError) exception object.  Handles Python 1.5.1 and 1.5.2 styles, and
 184     does what it can to deal with exception objects that don't have a
 185     filename (which happens when the error is due to a two-file operation,
 186     such as 'rename()' or 'link()'.  Returns the error message as a string
 187     prefixed with 'prefix'.
 188     """
 189     # check for Python 1.5.2-style {IO,OS}Error exception objects
 190     if hasattr(exc, 'filename') and hasattr(exc, 'strerror'):
 191         if exc.filename:
 192             error = prefix + "%s: %s" % (exc.filename, exc.strerror)
 193         else:
 194             # two-argument functions in posix module don't
 195             # include the filename in the exception object!
 196             error = prefix + "%s" % exc.strerror
 197     else:
 198         error = prefix + str(exc[-1])
 199
 200     return error
 201
 202
 203 # Needed by 'split_quoted()'
 204 _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace)
 205 _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
 206 _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
 207
 208 def split_quoted (s):
 209     """Split a string up according to Unix shell-like rules for quotes and
 210     backslashes.  In short: words are delimited by spaces, as long as those
 211     spaces are not escaped by a backslash, or inside a quoted string.
 212     Single and double quotes are equivalent, and the quote characters can
 213     be backslash-escaped.  The backslash is stripped from any two-character
 214     escape sequence, leaving only the escaped character.  The quote
 215     characters are stripped from any quoted string.  Returns a list of
 216     words.
 217     """
 218
 219     # This is a nice algorithm for splitting up a single string, since it
 220     # doesn't require character-by-character examination.  It was a little
 221     # bit of a brain-bender to get it working right, though...
 222
 223     s = string.strip(s)
 224     words = []
 225     pos = 0
 226
 227     while s:
 228         m = _wordchars_re.match(s, pos)
 229         end = m.end()
 230         if end == len(s):
 231             words.append(s[:end])
 232             break
 233
 234         if s[end] in string.whitespace: # unescaped, unquoted whitespace: now
 235             words.append(s[:end])       # we definitely have a word delimiter
 236             s = string.lstrip(s[end:])
 237             pos = 0
 238
 239         elif s[end] == '\\':            # preserve whatever is being escaped;
 240                                         # will become part of the current word
 241             s = s[:end] + s[end+1:]
 242             pos = end+1
 243
 244         else:
 245             if s[end] == "'":           # slurp singly-quoted string
 246                 m = _squote_re.match(s, end)
 247             elif s[end] == '"':         # slurp doubly-quoted string
 248                 m = _dquote_re.match(s, end)
 249             else:
 250                 raise RuntimeError, \
 251                       "this can't happen (bad char '%c')" % s[end]
 252
 253             if m is None:
 254                 raise ValueError, \
 255                       "bad string (mismatched %s quotes?)" % s[end]
 256
 257             (beg, end) = m.span()
 258             s = s[:beg] + s[beg+1:end-1] + s[end:]
 259             pos = m.end() - 2
 260
 261         if pos >= len(s):
 262             words.append(s)
 263             break
 264
 265     return words
 266
 267 # split_quoted ()
 268
 269
 270 def execute (func, args, msg=None, verbose=0, dry_run=0):
 271     """Perform some action that affects the outside world (eg.  by writing
 272     to the filesystem).  Such actions are special because they are disabled
 273     by the 'dry_run' flag, and announce themselves if 'verbose' is true.
 274     This method takes care of all that bureaucracy for you; all you have to
 275     do is supply the function to call and an argument tuple for it (to
 276     embody the "external action" being performed), and an optional message
 277     to print.
 278     """
 279     # Generate a message if we weren't passed one
 280     if msg is None:
 281         msg = "%s%s" % (func.__name__, `args`)
 282         if msg[-2:] == ',)':        # correct for singleton tuple
 283             msg = msg[0:-2] + ')'
 284
 285     # Print it if verbosity level is high enough
 286     if verbose:
 287         print msg
 288
 289     # And do it, as long as we're not in dry-run mode
 290     if not dry_run:
 291         apply(func, args)
 292
 293 # execute()
 294
 295
 296 def strtobool (val):
 297     """Convert a string representation of truth to true (1) or false (0).
 298     True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
 299     are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
 300     'val' is anything else.
 301     """
 302     val = string.lower(val)
 303     if val in ('y', 'yes', 't', 'true', 'on', '1'):
 304         return 1
 305     elif val in ('n', 'no', 'f', 'false', 'off', '0'):
 306         return 0
 307     else:
 308         raise ValueError, "invalid truth value %s" % `val`
 309
 310
 311 def byte_compile (py_files,
 312                   optimize=0, force=0,
 313                   prefix=None, base_dir=None,
 314                   verbose=1, dry_run=0,
 315                   direct=None):
 316     """Byte-compile a collection of Python source files to either .pyc
 317     or .pyo files in the same directory.  'py_files' is a list of files
 318     to compile; any files that don't end in ".py" are silently skipped.
 319     'optimize' must be one of the following:
 320       0 - don't optimize (generate .pyc)
 321       1 - normal optimization (like "python -O")
 322       2 - extra optimization (like "python -OO")
 323     If 'force' is true, all files are recompiled regardless of
 324     timestamps.
 325
 326     The source filename encoded in each bytecode file defaults to the
 327     filenames listed in 'py_files'; you can modify these with 'prefix' and
 328     'basedir'.  'prefix' is a string that will be stripped off of each
 329     source filename, and 'base_dir' is a directory name that will be
 330     prepended (after 'prefix' is stripped).  You can supply either or both
 331     (or neither) of 'prefix' and 'base_dir', as you wish.
 332
 333     If 'verbose' is true, prints out a report of each file.  If 'dry_run'
 334     is true, doesn't actually do anything that would affect the filesystem.
 335
 336     Byte-compilation is either done directly in this interpreter process
 337     with the standard py_compile module, or indirectly by writing a
 338     temporary script and executing it.  Normally, you should let
 339     'byte_compile()' figure out to use direct compilation or not (see
 340     the source for details).  The 'direct' flag is used by the script
 341     generated in indirect mode; unless you know what you're doing, leave
 342     it set to None.
 343     """
 344
 345     # First, if the caller didn't force us into direct or indirect mode,
 346     # figure out which mode we should be in.  We take a conservative
 347     # approach: choose direct mode *only* if the current interpreter is
 348     # in debug mode and optimize is 0.  If we're not in debug mode (-O
 349     # or -OO), we don't know which level of optimization this
 350     # interpreter is running with, so we can't do direct
 351     # byte-compilation and be certain that it's the right thing.  Thus,
 352     # always compile indirectly if the current interpreter is in either
 353     # optimize mode, or if either optimization level was requested by
 354     # the caller.
 355     if direct is None:
 356         direct = (__debug__ and optimize == 0)
 357
 358     # "Indirect" byte-compilation: write a temporary script and then
 359     # run it with the appropriate flags.
 360     if not direct:
 361         from tempfile import mktemp
 362         script_name = mktemp(".py")
 363         if verbose:
 364             print "writing byte-compilation script '%s'" % script_name
 365         if not dry_run:
 366             script = open(script_name, "w")
 367
 368             script.write("""\
 369 from distutils.util import byte_compile
 370 files = [
 371 """)
 372
 373             # XXX would be nice to write absolute filenames, just for
 374             # safety's sake (script should be more robust in the face of
 375             # chdir'ing before running it).  But this requires abspath'ing
 376             # 'prefix' as well, and that breaks the hack in build_lib's
 377             # 'byte_compile()' method that carefully tacks on a trailing
 378             # slash (os.sep really) to make sure the prefix here is "just
 379             # right".  This whole prefix business is rather delicate -- the
 380             # problem is that it's really a directory, but I'm treating it
 381             # as a dumb string, so trailing slashes and so forth matter.
 382
 383             #py_files = map(os.path.abspath, py_files)
 384             #if prefix:
 385             #    prefix = os.path.abspath(prefix)
 386
 387             script.write(string.join(map(repr, py_files), ",\n") + "]\n")
 388             script.write("""
 389 byte_compile(files, optimize=%s, force=%s,
 390              prefix=%s, base_dir=%s,
 391              verbose=%s, dry_run=0,
 392              direct=1)
 393 """ % (`optimize`, `force`, `prefix`, `base_dir`, `verbose`))
 394
 395             script.close()
 396
 397         cmd = [sys.executable, script_name]
 398         if optimize == 1:
 399             cmd.insert(1, "-O")
 400         elif optimize == 2:
 401             cmd.insert(1, "-OO")
 402         spawn(cmd, verbose=verbose, dry_run=dry_run)
 403         execute(os.remove, (script_name,), "removing %s" % script_name,
 404                 verbose=verbose, dry_run=dry_run)
 405
 406     # "Direct" byte-compilation: use the py_compile module to compile
 407     # right here, right now.  Note that the script generated in indirect
 408     # mode simply calls 'byte_compile()' in direct mode, a weird sort of
 409     # cross-process recursion.  Hey, it works!
 410     else:
 411         from py_compile import compile
 412
 413         for file in py_files:
 414             if file[-3:] != ".py":
 415                 # This lets us be lazy and not filter filenames in
 416                 # the "install_lib" command.
 417                 continue
 418
 419             # Terminology from the py_compile module:
 420             #   cfile - byte-compiled file
 421             #   dfile - purported source filename (same as 'file' by default)
 422             cfile = file + (__debug__ and "c" or "o")
 423             dfile = file
 424             if prefix:
 425                 if file[:len(prefix)] != prefix:
 426                     raise ValueError, \
 427                           ("invalid prefix: filename %s doesn't start with %s"
 428                            % (`file`, `prefix`))
 429                 dfile = dfile[len(prefix):]
 430             if base_dir:
 431                 dfile = os.path.join(base_dir, dfile)
 432
 433             cfile_base = os.path.basename(cfile)
 434             if direct:
 435                 if force or newer(file, cfile):
 436                     if verbose:
 437                         print "byte-compiling %s to %s" % (file, cfile_base)
 438                     if not dry_run:
 439                         compile(file, cfile, dfile)
 440                 else:
 441                     if verbose:
 442                         print "skipping byte-compilation of %s to %s" % \
 443                               (file, cfile_base)
 444
 445 # byte_compile ()