Lib/string.py

   1 """A collection of string operations (most are no longer used in Python 1.6).
   2
   3 Warning: most of the code you see here isn't normally used nowadays.  With
   4 Python 1.6, many of these functions are implemented as methods on the
   5 standard string object. They used to be implemented by a built-in module
   6 called strop, but strop is now obsolete itself.
   7
   8 Public module variables:
   9
  10 whitespace -- a string containing all characters considered whitespace
  11 lowercase -- a string containing all characters considered lowercase letters
  12 uppercase -- a string containing all characters considered uppercase letters
  13 letters -- a string containing all characters considered letters
  14 digits -- a string containing all characters considered decimal digits
  15 hexdigits -- a string containing all characters considered hexadecimal digits
  16 octdigits -- a string containing all characters considered octal digits
  17 punctuation -- a string containing all characters considered punctuation
  18 printable -- a string containing all characters considered printable
  19
  20 """
  21
  22 # Some strings for ctype-style character classification
  23 whitespace = ' \t\n\r\v\f'
  24 lowercase = 'abcdefghijklmnopqrstuvwxyz'
  25 uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
  26 letters = lowercase + uppercase
  27 ascii_lowercase = lowercase
  28 ascii_uppercase = uppercase
  29 ascii_letters = ascii_lowercase + ascii_uppercase
  30 digits = '0123456789'
  31 hexdigits = digits + 'abcdef' + 'ABCDEF'
  32 octdigits = '01234567'
  33 punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
  34 printable = digits + letters + punctuation + whitespace
  35
  36 # Case conversion helpers
  37 _idmap = ''
  38 for i in range(256): _idmap = _idmap + chr(i)
  39 del i
  40
  41 # Backward compatible names for exceptions
  42 index_error = ValueError
  43 atoi_error = ValueError
  44 atof_error = ValueError
  45 atol_error = ValueError
  46
  47 # convert UPPER CASE letters to lower case
  48 def lower(s):
  49     """lower(s) -> string
  50
  51     Return a copy of the string s converted to lowercase.
  52
  53     """
  54     return s.lower()
  55
  56 # Convert lower case letters to UPPER CASE
  57 def upper(s):
  58     """upper(s) -> string
  59
  60     Return a copy of the string s converted to uppercase.
  61
  62     """
  63     return s.upper()
  64
  65 # Swap lower case letters and UPPER CASE
  66 def swapcase(s):
  67     """swapcase(s) -> string
  68
  69     Return a copy of the string s with upper case characters
  70     converted to lowercase and vice versa.
  71
  72     """
  73     return s.swapcase()
  74
  75 # Strip leading and trailing tabs and spaces
  76 def strip(s):
  77     """strip(s) -> string
  78
  79     Return a copy of the string s with leading and trailing
  80     whitespace removed.
  81
  82     """
  83     return s.strip()
  84
  85 # Strip leading tabs and spaces
  86 def lstrip(s):
  87     """lstrip(s) -> string
  88
  89     Return a copy of the string s with leading whitespace removed.
  90
  91     """
  92     return s.lstrip()
  93
  94 # Strip trailing tabs and spaces
  95 def rstrip(s):
  96     """rstrip(s) -> string
  97
  98     Return a copy of the string s with trailing whitespace
  99     removed.
 100
 101     """
 102     return s.rstrip()
 103
 104
 105 # Split a string into a list of space/tab-separated words
 106 def split(s, sep=None, maxsplit=-1):
 107     """split(s [,sep [,maxsplit]]) -> list of strings
 108
 109     Return a list of the words in the string s, using sep as the
 110     delimiter string.  If maxsplit is given, splits at no more than
 111     maxsplit places (resulting in at most maxsplit+1 words).  If sep
 112     is not specified, any whitespace string is a separator.
 113
 114     (split and splitfields are synonymous)
 115
 116     """
 117     return s.split(sep, maxsplit)
 118 splitfields = split
 119
 120 # Join fields with optional separator
 121 def join(words, sep = ' '):
 122     """join(list [,sep]) -> string
 123
 124     Return a string composed of the words in list, with
 125     intervening occurrences of sep.  The default separator is a
 126     single space.
 127
 128     (joinfields and join are synonymous)
 129
 130     """
 131     return sep.join(words)
 132 joinfields = join
 133
 134 # Find substring, raise exception if not found
 135 def index(s, *args):
 136     """index(s, sub [,start [,end]]) -> int
 137
 138     Like find but raises ValueError when the substring is not found.
 139
 140     """
 141     return s.index(*args)
 142
 143 # Find last substring, raise exception if not found
 144 def rindex(s, *args):
 145     """rindex(s, sub [,start [,end]]) -> int
 146
 147     Like rfind but raises ValueError when the substring is not found.
 148
 149     """
 150     return s.rindex(*args)
 151
 152 # Count non-overlapping occurrences of substring
 153 def count(s, *args):
 154     """count(s, sub[, start[,end]]) -> int
 155
 156     Return the number of occurrences of substring sub in string
 157     s[start:end].  Optional arguments start and end are
 158     interpreted as in slice notation.
 159
 160     """
 161     return s.count(*args)
 162
 163 # Find substring, return -1 if not found
 164 def find(s, *args):
 165     """find(s, sub [,start [,end]]) -> in
 166
 167     Return the lowest index in s where substring sub is found,
 168     such that sub is contained within s[start,end].  Optional
 169     arguments start and end are interpreted as in slice notation.
 170
 171     Return -1 on failure.
 172
 173     """
 174     return s.find(*args)
 175
 176 # Find last substring, return -1 if not found
 177 def rfind(s, *args):
 178     """rfind(s, sub [,start [,end]]) -> int
 179
 180     Return the highest index in s where substring sub is found,
 181     such that sub is contained within s[start,end].  Optional
 182     arguments start and end are interpreted as in slice notation.
 183
 184     Return -1 on failure.
 185
 186     """
 187     return s.rfind(*args)
 188
 189 # for a bit of speed
 190 _float = float
 191 _int = int
 192 _long = long
 193 try:
 194     _StringTypes = (str, unicode)
 195 except NameError:
 196     _StringTypes = (str,)
 197
 198 # Convert string to float
 199 def atof(s):
 200     """atof(s) -> float
 201
 202     Return the floating point number represented by the string s.
 203
 204     """
 205     return _float(s)
 206
 207
 208 # Convert string to integer
 209 def atoi(s , base=10):
 210     """atoi(s [,base]) -> int
 211
 212     Return the integer represented by the string s in the given
 213     base, which defaults to 10.  The string s must consist of one
 214     or more digits, possibly preceded by a sign.  If base is 0, it
 215     is chosen from the leading characters of s, 0 for octal, 0x or
 216     0X for hexadecimal.  If base is 16, a preceding 0x or 0X is
 217     accepted.
 218
 219     """
 220     return _int(s, base)
 221
 222
 223 # Convert string to long integer
 224 def atol(s, base=10):
 225     """atol(s [,base]) -> long
 226
 227     Return the long integer represented by the string s in the
 228     given base, which defaults to 10.  The string s must consist
 229     of one or more digits, possibly preceded by a sign.  If base
 230     is 0, it is chosen from the leading characters of s, 0 for
 231     octal, 0x or 0X for hexadecimal.  If base is 16, a preceding
 232     0x or 0X is accepted.  A trailing L or l is not accepted,
 233     unless base is 0.
 234
 235     """
 236     return _long(s, base)
 237
 238
 239 # Left-justify a string
 240 def ljust(s, width):
 241     """ljust(s, width) -> string
 242
 243     Return a left-justified version of s, in a field of the
 244     specified width, padded with spaces as needed.  The string is
 245     never truncated.
 246
 247     """
 248     return s.ljust(width)
 249
 250 # Right-justify a string
 251 def rjust(s, width):
 252     """rjust(s, width) -> string
 253
 254     Return a right-justified version of s, in a field of the
 255     specified width, padded with spaces as needed.  The string is
 256     never truncated.
 257
 258     """
 259     return s.rjust(width)
 260
 261 # Center a string
 262 def center(s, width):
 263     """center(s, width) -> string
 264
 265     Return a center version of s, in a field of the specified
 266     width. padded with spaces as needed.  The string is never
 267     truncated.
 268
 269     """
 270     return s.center(width)
 271
 272 # Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
 273 # Decadent feature: the argument may be a string or a number
 274 # (Use of this is deprecated; it should be a string as with ljust c.s.)
 275 def zfill(x, width):
 276     """zfill(x, width) -> string
 277
 278     Pad a numeric string x with zeros on the left, to fill a field
 279     of the specified width.  The string x is never truncated.
 280
 281     """
 282     if not isinstance(x, _StringTypes):
 283         x = repr(x)
 284     return x.zfill(width)
 285
 286 # Expand tabs in a string.
 287 # Doesn't take non-printing chars into account, but does understand \n.
 288 def expandtabs(s, tabsize=8):
 289     """expandtabs(s [,tabsize]) -> string
 290
 291     Return a copy of the string s with all tab characters replaced
 292     by the appropriate number of spaces, depending on the current
 293     column, and the tabsize (default 8).
 294
 295     """
 296     return s.expandtabs(tabsize)
 297
 298 # Character translation through look-up table.
 299 def translate(s, table, deletions=""):
 300     """translate(s,table [,deletions]) -> string
 301
 302     Return a copy of the string s, where all characters occurring
 303     in the optional argument deletions are removed, and the
 304     remaining characters have been mapped through the given
 305     translation table, which must be a string of length 256.  The
 306     deletions argument is not allowed for Unicode strings.
 307
 308     """
 309     if deletions:
 310         return s.translate(table, deletions)
 311     else:
 312         # Add s[:0] so that if s is Unicode and table is an 8-bit string,
 313         # table is converted to Unicode.  This means that table *cannot*
 314         # be a dictionary -- for that feature, use u.translate() directly.
 315         return s.translate(table + s[:0])
 316
 317 # Capitalize a string, e.g. "aBc  dEf" -> "Abc  def".
 318 def capitalize(s):
 319     """capitalize(s) -> string
 320
 321     Return a copy of the string s with only its first character
 322     capitalized.
 323
 324     """
 325     return s.capitalize()
 326
 327 # Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
 328 # See also regsub.capwords().
 329 def capwords(s, sep=None):
 330     """capwords(s, [sep]) -> string
 331
 332     Split the argument into words using split, capitalize each
 333     word using capitalize, and join the capitalized words using
 334     join. Note that this replaces runs of whitespace characters by
 335     a single space.
 336
 337     """
 338     return join(map(capitalize, s.split(sep)), sep or ' ')
 339
 340 # Construct a translation string
 341 _idmapL = None
 342 def maketrans(fromstr, tostr):
 343     """maketrans(frm, to) -> string
 344
 345     Return a translation table (a string of 256 bytes long)
 346     suitable for use in string.translate.  The strings frm and to
 347     must be of the same length.
 348
 349     """
 350     if len(fromstr) != len(tostr):
 351         raise ValueError, "maketrans arguments must have same length"
 352     global _idmapL
 353     if not _idmapL:
 354         _idmapL = map(None, _idmap)
 355     L = _idmapL[:]
 356     fromstr = map(ord, fromstr)
 357     for i in range(len(fromstr)):
 358         L[fromstr[i]] = tostr[i]
 359     return join(L, "")
 360
 361 # Substring replacement (global)
 362 def replace(s, old, new, maxsplit=-1):
 363     """replace (str, old, new[, maxsplit]) -> string
 364
 365     Return a copy of string str with all occurrences of substring
 366     old replaced by new. If the optional argument maxsplit is
 367     given, only the first maxsplit occurrences are replaced.
 368
 369     """
 370     return s.replace(old, new, maxsplit)
 371
 372
 373 # Try importing optional built-in module "strop" -- if it exists,
 374 # it redefines some string operations that are 100-1000 times faster.
 375 # It also defines values for whitespace, lowercase and uppercase
 376 # that match <ctype.h>'s definitions.
 377
 378 try:
 379     from strop import maketrans, lowercase, uppercase, whitespace
 380     letters = lowercase + uppercase
 381 except ImportError:
 382     pass                                          # Use the original versions