Lib/string.py

   1 """A collection of string operations (most are no longer used in Python 1.6).
   2
   3 Warning: most of the code you see here isn't normally used nowadays.  With
   4 Python 1.6, many of these functions are implemented as methods on the
   5 standard string object. They used to be implemented by a built-in module
   6 called strop, but strop is now obsolete itself.
   7
   8 Public module variables:
   9
  10 whitespace -- a string containing all characters considered whitespace
  11 lowercase -- a string containing all characters considered lowercase letters
  12 uppercase -- a string containing all characters considered uppercase letters
  13 letters -- a string containing all characters considered letters
  14 digits -- a string containing all characters considered decimal digits
  15 hexdigits -- a string containing all characters considered hexadecimal digits
  16 octdigits -- a string containing all characters considered octal digits
  17 punctuation -- a string containing all characters considered punctuation
  18 printable -- a string containing all characters considered printable
  19
  20 """
  21
  22 # Some strings for ctype-style character classification
  23 whitespace = ' \t\n\r\v\f'
  24 lowercase = 'abcdefghijklmnopqrstuvwxyz'
  25 uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
  26 letters = lowercase + uppercase
  27 ascii_lowercase = lowercase
  28 ascii_uppercase = uppercase
  29 ascii_letters = ascii_lowercase + ascii_uppercase
  30 digits = '0123456789'
  31 hexdigits = digits + 'abcdef' + 'ABCDEF'
  32 octdigits = '01234567'
  33 punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
  34 printable = digits + letters + punctuation + whitespace
  35
  36 # Case conversion helpers
  37 _idmap = ''
  38 for i in range(256): _idmap = _idmap + chr(i)
  39 del i
  40
  41 # Backward compatible names for exceptions
  42 index_error = ValueError
  43 atoi_error = ValueError
  44 atof_error = ValueError
  45 atol_error = ValueError
  46
  47 # convert UPPER CASE letters to lower case
  48 def lower(s):
  49     """lower(s) -> string
  50
  51     Return a copy of the string s converted to lowercase.
  52
  53     """
  54     return s.lower()
  55
  56 # Convert lower case letters to UPPER CASE
  57 def upper(s):
  58     """upper(s) -> string
  59
  60     Return a copy of the string s converted to uppercase.
  61
  62     """
  63     return s.upper()
  64
  65 # Swap lower case letters and UPPER CASE
  66 def swapcase(s):
  67     """swapcase(s) -> string
  68
  69     Return a copy of the string s with upper case characters
  70     converted to lowercase and vice versa.
  71
  72     """
  73     return s.swapcase()
  74
  75 # Strip leading and trailing tabs and spaces
  76 def strip(s):
  77     """strip(s) -> string
  78
  79     Return a copy of the string s with leading and trailing
  80     whitespace removed.
  81
  82     """
  83     return s.strip()
  84
  85 # Strip leading tabs and spaces
  86 def lstrip(s):
  87     """lstrip(s) -> string
  88
  89     Return a copy of the string s with leading whitespace removed.
  90
  91     """
  92     return s.lstrip()
  93
  94 # Strip trailing tabs and spaces
  95 def rstrip(s):
  96     """rstrip(s) -> string
  97
  98     Return a copy of the string s with trailing whitespace
  99     removed.
 100
 101     """
 102     return s.rstrip()
 103
 104
 105 # Split a string into a list of space/tab-separated words
 106 def split(s, sep=None, maxsplit=-1):
 107     """split(s [,sep [,maxsplit]]) -> list of strings
 108
 109     Return a list of the words in the string s, using sep as the
 110     delimiter string.  If maxsplit is given, splits into at most
 111     maxsplit words.  If sep is not specified, any whitespace string
 112     is a separator.
 113
 114     (split and splitfields are synonymous)
 115
 116     """
 117     return s.split(sep, maxsplit)
 118 splitfields = split
 119
 120 # Join fields with optional separator
 121 def join(words, sep = ' '):
 122     """join(list [,sep]) -> string
 123
 124     Return a string composed of the words in list, with
 125     intervening occurrences of sep.  The default separator is a
 126     single space.
 127
 128     (joinfields and join are synonymous)
 129
 130     """
 131     return sep.join(words)
 132 joinfields = join
 133
 134 # Find substring, raise exception if not found
 135 def index(s, *args):
 136     """index(s, sub [,start [,end]]) -> int
 137
 138     Like find but raises ValueError when the substring is not found.
 139
 140     """
 141     return s.index(*args)
 142
 143 # Find last substring, raise exception if not found
 144 def rindex(s, *args):
 145     """rindex(s, sub [,start [,end]]) -> int
 146
 147     Like rfind but raises ValueError when the substring is not found.
 148
 149     """
 150     return s.rindex(*args)
 151
 152 # Count non-overlapping occurrences of substring
 153 def count(s, *args):
 154     """count(s, sub[, start[,end]]) -> int
 155
 156     Return the number of occurrences of substring sub in string
 157     s[start:end].  Optional arguments start and end are
 158     interpreted as in slice notation.
 159
 160     """
 161     return s.count(*args)
 162
 163 # Find substring, return -1 if not found
 164 def find(s, *args):
 165     """find(s, sub [,start [,end]]) -> in
 166
 167     Return the lowest index in s where substring sub is found,
 168     such that sub is contained within s[start,end].  Optional
 169     arguments start and end are interpreted as in slice notation.
 170
 171     Return -1 on failure.
 172
 173     """
 174     return s.find(*args)
 175
 176 # Find last substring, return -1 if not found
 177 def rfind(s, *args):
 178     """rfind(s, sub [,start [,end]]) -> int
 179
 180     Return the highest index in s where substring sub is found,
 181     such that sub is contained within s[start,end].  Optional
 182     arguments start and end are interpreted as in slice notation.
 183
 184     Return -1 on failure.
 185
 186     """
 187     return s.rfind(*args)
 188
 189 # for a bit of speed
 190 _float = float
 191 _int = int
 192 _long = long
 193 _StringType = type('')
 194
 195 # Convert string to float
 196 def atof(s):
 197     """atof(s) -> float
 198
 199     Return the floating point number represented by the string s.
 200
 201     """
 202     return _float(s)
 203
 204
 205 # Convert string to integer
 206 def atoi(s , base=10):
 207     """atoi(s [,base]) -> int
 208
 209     Return the integer represented by the string s in the given
 210     base, which defaults to 10.  The string s must consist of one
 211     or more digits, possibly preceded by a sign.  If base is 0, it
 212     is chosen from the leading characters of s, 0 for octal, 0x or
 213     0X for hexadecimal.  If base is 16, a preceding 0x or 0X is
 214     accepted.
 215
 216     """
 217     return _int(s, base)
 218
 219
 220 # Convert string to long integer
 221 def atol(s, base=10):
 222     """atol(s [,base]) -> long
 223
 224     Return the long integer represented by the string s in the
 225     given base, which defaults to 10.  The string s must consist
 226     of one or more digits, possibly preceded by a sign.  If base
 227     is 0, it is chosen from the leading characters of s, 0 for
 228     octal, 0x or 0X for hexadecimal.  If base is 16, a preceding
 229     0x or 0X is accepted.  A trailing L or l is not accepted,
 230     unless base is 0.
 231
 232     """
 233     return _long(s, base)
 234
 235
 236 # Left-justify a string
 237 def ljust(s, width):
 238     """ljust(s, width) -> string
 239
 240     Return a left-justified version of s, in a field of the
 241     specified width, padded with spaces as needed.  The string is
 242     never truncated.
 243
 244     """
 245     return s.ljust(width)
 246
 247 # Right-justify a string
 248 def rjust(s, width):
 249     """rjust(s, width) -> string
 250
 251     Return a right-justified version of s, in a field of the
 252     specified width, padded with spaces as needed.  The string is
 253     never truncated.
 254
 255     """
 256     return s.rjust(width)
 257
 258 # Center a string
 259 def center(s, width):
 260     """center(s, width) -> string
 261
 262     Return a center version of s, in a field of the specified
 263     width. padded with spaces as needed.  The string is never
 264     truncated.
 265
 266     """
 267     return s.center(width)
 268
 269 # Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
 270 # Decadent feature: the argument may be a string or a number
 271 # (Use of this is deprecated; it should be a string as with ljust c.s.)
 272 def zfill(x, width):
 273     """zfill(x, width) -> string
 274
 275     Pad a numeric string x with zeros on the left, to fill a field
 276     of the specified width.  The string x is never truncated.
 277
 278     """
 279     if type(x) == type(''): s = x
 280     else: s = `x`
 281     n = len(s)
 282     if n >= width: return s
 283     sign = ''
 284     if s[0] in ('-', '+'):
 285         sign, s = s[0], s[1:]
 286     return sign + '0'*(width-n) + s
 287
 288 # Expand tabs in a string.
 289 # Doesn't take non-printing chars into account, but does understand \n.
 290 def expandtabs(s, tabsize=8):
 291     """expandtabs(s [,tabsize]) -> string
 292
 293     Return a copy of the string s with all tab characters replaced
 294     by the appropriate number of spaces, depending on the current
 295     column, and the tabsize (default 8).
 296
 297     """
 298     return s.expandtabs(tabsize)
 299
 300 # Character translation through look-up table.
 301 def translate(s, table, deletions=""):
 302     """translate(s,table [,deletions]) -> string
 303
 304     Return a copy of the string s, where all characters occurring
 305     in the optional argument deletions are removed, and the
 306     remaining characters have been mapped through the given
 307     translation table, which must be a string of length 256.  The
 308     deletions argument is not allowed for Unicode strings.
 309
 310     """
 311     if deletions:
 312         return s.translate(table, deletions)
 313     else:
 314         # Add s[:0] so that if s is Unicode and table is an 8-bit string,
 315         # table is converted to Unicode.  This means that table *cannot*
 316         # be a dictionary -- for that feature, use u.translate() directly.
 317         return s.translate(table + s[:0])
 318
 319 # Capitalize a string, e.g. "aBc  dEf" -> "Abc  def".
 320 def capitalize(s):
 321     """capitalize(s) -> string
 322
 323     Return a copy of the string s with only its first character
 324     capitalized.
 325
 326     """
 327     return s.capitalize()
 328
 329 # Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
 330 # See also regsub.capwords().
 331 def capwords(s, sep=None):
 332     """capwords(s, [sep]) -> string
 333
 334     Split the argument into words using split, capitalize each
 335     word using capitalize, and join the capitalized words using
 336     join. Note that this replaces runs of whitespace characters by
 337     a single space.
 338
 339     """
 340     return join(map(capitalize, s.split(sep)), sep or ' ')
 341
 342 # Construct a translation string
 343 _idmapL = None
 344 def maketrans(fromstr, tostr):
 345     """maketrans(frm, to) -> string
 346
 347     Return a translation table (a string of 256 bytes long)
 348     suitable for use in string.translate.  The strings frm and to
 349     must be of the same length.
 350
 351     """
 352     if len(fromstr) != len(tostr):
 353         raise ValueError, "maketrans arguments must have same length"
 354     global _idmapL
 355     if not _idmapL:
 356         _idmapL = map(None, _idmap)
 357     L = _idmapL[:]
 358     fromstr = map(ord, fromstr)
 359     for i in range(len(fromstr)):
 360         L[fromstr[i]] = tostr[i]
 361     return join(L, "")
 362
 363 # Substring replacement (global)
 364 def replace(s, old, new, maxsplit=-1):
 365     """replace (str, old, new[, maxsplit]) -> string
 366
 367     Return a copy of string str with all occurrences of substring
 368     old replaced by new. If the optional argument maxsplit is
 369     given, only the first maxsplit occurrences are replaced.
 370
 371     """
 372     return s.replace(old, new, maxsplit)
 373
 374
 375 # Try importing optional built-in module "strop" -- if it exists,
 376 # it redefines some string operations that are 100-1000 times faster.
 377 # It also defines values for whitespace, lowercase and uppercase
 378 # that match <ctype.h>'s definitions.
 379
 380 try:
 381     from strop import maketrans, lowercase, uppercase, whitespace
 382     letters = lowercase + uppercase
 383 except ImportError:
 384     pass                                          # Use the original versions