Lib/string.py

   1 """A collection of string operations (most are no longer used in Python 1.6).
   2
   3 Warning: most of the code you see here isn't normally used nowadays.  With
   4 Python 1.6, many of these functions are implemented as methods on the
   5 standard string object. They used to be implemented by a built-in module
   6 called strop, but strop is now obsolete itself.
   7
   8 Public module variables:
   9
  10 whitespace -- a string containing all characters considered whitespace
  11 lowercase -- a string containing all characters considered lowercase letters
  12 uppercase -- a string containing all characters considered uppercase letters
  13 letters -- a string containing all characters considered letters
  14 digits -- a string containing all characters considered decimal digits
  15 hexdigits -- a string containing all characters considered hexadecimal digits
  16 octdigits -- a string containing all characters considered octal digits
  17 punctuation -- a string containing all characters considered punctuation
  18 printable -- a string containing all characters considered printable
  19
  20 """
  21
  22 # Some strings for ctype-style character classification
  23 whitespace = ' \t\n\r\v\f'
  24 lowercase = 'abcdefghijklmnopqrstuvwxyz'
  25 uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
  26 letters = lowercase + uppercase
  27 ascii_lowercase = lowercase
  28 ascii_uppercase = uppercase
  29 ascii_letters = ascii_lowercase + ascii_uppercase
  30 digits = '0123456789'
  31 hexdigits = digits + 'abcdef' + 'ABCDEF'
  32 octdigits = '01234567'
  33 punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
  34 printable = digits + letters + punctuation + whitespace
  35
  36 # Case conversion helpers
  37 _idmap = ''
  38 for i in range(256): _idmap = _idmap + chr(i)
  39 del i
  40
  41 # Backward compatible names for exceptions
  42 index_error = ValueError
  43 atoi_error = ValueError
  44 atof_error = ValueError
  45 atol_error = ValueError
  46
  47 # convert UPPER CASE letters to lower case
  48 def lower(s):
  49     """lower(s) -> string
  50
  51     Return a copy of the string s converted to lowercase.
  52
  53     """
  54     return s.lower()
  55
  56 # Convert lower case letters to UPPER CASE
  57 def upper(s):
  58     """upper(s) -> string
  59
  60     Return a copy of the string s converted to uppercase.
  61
  62     """
  63     return s.upper()
  64
  65 # Swap lower case letters and UPPER CASE
  66 def swapcase(s):
  67     """swapcase(s) -> string
  68
  69     Return a copy of the string s with upper case characters
  70     converted to lowercase and vice versa.
  71
  72     """
  73     return s.swapcase()
  74
  75 # Strip leading and trailing tabs and spaces
  76 def strip(s):
  77     """strip(s) -> string
  78
  79     Return a copy of the string s with leading and trailing
  80     whitespace removed.
  81
  82     """
  83     return s.strip()
  84
  85 # Strip leading tabs and spaces
  86 def lstrip(s):
  87     """lstrip(s) -> string
  88
  89     Return a copy of the string s with leading whitespace removed.
  90
  91     """
  92     return s.lstrip()
  93
  94 # Strip trailing tabs and spaces
  95 def rstrip(s):
  96     """rstrip(s) -> string
  97
  98     Return a copy of the string s with trailing whitespace
  99     removed.
 100
 101     """
 102     return s.rstrip()
 103
 104
 105 # Split a string into a list of space/tab-separated words
 106 def split(s, sep=None, maxsplit=-1):
 107     """split(s [,sep [,maxsplit]]) -> list of strings
 108
 109     Return a list of the words in the string s, using sep as the
 110     delimiter string.  If maxsplit is given, splits at no more than
 111     maxsplit places (resulting in at most maxsplit+1 words).  If sep
 112     is not specified, any whitespace string is a separator.
 113
 114     (split and splitfields are synonymous)
 115
 116     """
 117     return s.split(sep, maxsplit)
 118 splitfields = split
 119
 120 # Join fields with optional separator
 121 def join(words, sep = ' '):
 122     """join(list [,sep]) -> string
 123
 124     Return a string composed of the words in list, with
 125     intervening occurrences of sep.  The default separator is a
 126     single space.
 127
 128     (joinfields and join are synonymous)
 129
 130     """
 131     return sep.join(words)
 132 joinfields = join
 133
 134 # Find substring, raise exception if not found
 135 def index(s, *args):
 136     """index(s, sub [,start [,end]]) -> int
 137
 138     Like find but raises ValueError when the substring is not found.
 139
 140     """
 141     return s.index(*args)
 142
 143 # Find last substring, raise exception if not found
 144 def rindex(s, *args):
 145     """rindex(s, sub [,start [,end]]) -> int
 146
 147     Like rfind but raises ValueError when the substring is not found.
 148
 149     """
 150     return s.rindex(*args)
 151
 152 # Count non-overlapping occurrences of substring
 153 def count(s, *args):
 154     """count(s, sub[, start[,end]]) -> int
 155
 156     Return the number of occurrences of substring sub in string
 157     s[start:end].  Optional arguments start and end are
 158     interpreted as in slice notation.
 159
 160     """
 161     return s.count(*args)
 162
 163 # Find substring, return -1 if not found
 164 def find(s, *args):
 165     """find(s, sub [,start [,end]]) -> in
 166
 167     Return the lowest index in s where substring sub is found,
 168     such that sub is contained within s[start,end].  Optional
 169     arguments start and end are interpreted as in slice notation.
 170
 171     Return -1 on failure.
 172
 173     """
 174     return s.find(*args)
 175
 176 # Find last substring, return -1 if not found
 177 def rfind(s, *args):
 178     """rfind(s, sub [,start [,end]]) -> int
 179
 180     Return the highest index in s where substring sub is found,
 181     such that sub is contained within s[start,end].  Optional
 182     arguments start and end are interpreted as in slice notation.
 183
 184     Return -1 on failure.
 185
 186     """
 187     return s.rfind(*args)
 188
 189 # for a bit of speed
 190 _float = float
 191 _int = int
 192 _long = long
 193
 194 # Convert string to float
 195 def atof(s):
 196     """atof(s) -> float
 197
 198     Return the floating point number represented by the string s.
 199
 200     """
 201     return _float(s)
 202
 203
 204 # Convert string to integer
 205 def atoi(s , base=10):
 206     """atoi(s [,base]) -> int
 207
 208     Return the integer represented by the string s in the given
 209     base, which defaults to 10.  The string s must consist of one
 210     or more digits, possibly preceded by a sign.  If base is 0, it
 211     is chosen from the leading characters of s, 0 for octal, 0x or
 212     0X for hexadecimal.  If base is 16, a preceding 0x or 0X is
 213     accepted.
 214
 215     """
 216     return _int(s, base)
 217
 218
 219 # Convert string to long integer
 220 def atol(s, base=10):
 221     """atol(s [,base]) -> long
 222
 223     Return the long integer represented by the string s in the
 224     given base, which defaults to 10.  The string s must consist
 225     of one or more digits, possibly preceded by a sign.  If base
 226     is 0, it is chosen from the leading characters of s, 0 for
 227     octal, 0x or 0X for hexadecimal.  If base is 16, a preceding
 228     0x or 0X is accepted.  A trailing L or l is not accepted,
 229     unless base is 0.
 230
 231     """
 232     return _long(s, base)
 233
 234
 235 # Left-justify a string
 236 def ljust(s, width):
 237     """ljust(s, width) -> string
 238
 239     Return a left-justified version of s, in a field of the
 240     specified width, padded with spaces as needed.  The string is
 241     never truncated.
 242
 243     """
 244     return s.ljust(width)
 245
 246 # Right-justify a string
 247 def rjust(s, width):
 248     """rjust(s, width) -> string
 249
 250     Return a right-justified version of s, in a field of the
 251     specified width, padded with spaces as needed.  The string is
 252     never truncated.
 253
 254     """
 255     return s.rjust(width)
 256
 257 # Center a string
 258 def center(s, width):
 259     """center(s, width) -> string
 260
 261     Return a center version of s, in a field of the specified
 262     width. padded with spaces as needed.  The string is never
 263     truncated.
 264
 265     """
 266     return s.center(width)
 267
 268 # Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
 269 # Decadent feature: the argument may be a string or a number
 270 # (Use of this is deprecated; it should be a string as with ljust c.s.)
 271 def zfill(x, width):
 272     """zfill(x, width) -> string
 273
 274     Pad a numeric string x with zeros on the left, to fill a field
 275     of the specified width.  The string x is never truncated.
 276
 277     """
 278     if not isinstance(x, basestring):
 279         x = repr(x)
 280     return x.zfill(width)
 281
 282 # Expand tabs in a string.
 283 # Doesn't take non-printing chars into account, but does understand \n.
 284 def expandtabs(s, tabsize=8):
 285     """expandtabs(s [,tabsize]) -> string
 286
 287     Return a copy of the string s with all tab characters replaced
 288     by the appropriate number of spaces, depending on the current
 289     column, and the tabsize (default 8).
 290
 291     """
 292     return s.expandtabs(tabsize)
 293
 294 # Character translation through look-up table.
 295 def translate(s, table, deletions=""):
 296     """translate(s,table [,deletions]) -> string
 297
 298     Return a copy of the string s, where all characters occurring
 299     in the optional argument deletions are removed, and the
 300     remaining characters have been mapped through the given
 301     translation table, which must be a string of length 256.  The
 302     deletions argument is not allowed for Unicode strings.
 303
 304     """
 305     if deletions:
 306         return s.translate(table, deletions)
 307     else:
 308         # Add s[:0] so that if s is Unicode and table is an 8-bit string,
 309         # table is converted to Unicode.  This means that table *cannot*
 310         # be a dictionary -- for that feature, use u.translate() directly.
 311         return s.translate(table + s[:0])
 312
 313 # Capitalize a string, e.g. "aBc  dEf" -> "Abc  def".
 314 def capitalize(s):
 315     """capitalize(s) -> string
 316
 317     Return a copy of the string s with only its first character
 318     capitalized.
 319
 320     """
 321     return s.capitalize()
 322
 323 # Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
 324 # See also regsub.capwords().
 325 def capwords(s, sep=None):
 326     """capwords(s, [sep]) -> string
 327
 328     Split the argument into words using split, capitalize each
 329     word using capitalize, and join the capitalized words using
 330     join. Note that this replaces runs of whitespace characters by
 331     a single space.
 332
 333     """
 334     return join(map(capitalize, s.split(sep)), sep or ' ')
 335
 336 # Construct a translation string
 337 _idmapL = None
 338 def maketrans(fromstr, tostr):
 339     """maketrans(frm, to) -> string
 340
 341     Return a translation table (a string of 256 bytes long)
 342     suitable for use in string.translate.  The strings frm and to
 343     must be of the same length.
 344
 345     """
 346     if len(fromstr) != len(tostr):
 347         raise ValueError, "maketrans arguments must have same length"
 348     global _idmapL
 349     if not _idmapL:
 350         _idmapL = map(None, _idmap)
 351     L = _idmapL[:]
 352     fromstr = map(ord, fromstr)
 353     for i in range(len(fromstr)):
 354         L[fromstr[i]] = tostr[i]
 355     return join(L, "")
 356
 357 # Substring replacement (global)
 358 def replace(s, old, new, maxsplit=-1):
 359     """replace (str, old, new[, maxsplit]) -> string
 360
 361     Return a copy of string str with all occurrences of substring
 362     old replaced by new. If the optional argument maxsplit is
 363     given, only the first maxsplit occurrences are replaced.
 364
 365     """
 366     return s.replace(old, new, maxsplit)
 367
 368
 369 # Try importing optional built-in module "strop" -- if it exists,
 370 # it redefines some string operations that are 100-1000 times faster.
 371 # It also defines values for whitespace, lowercase and uppercase
 372 # that match <ctype.h>'s definitions.
 373
 374 try:
 375     from strop import maketrans, lowercase, uppercase, whitespace
 376     letters = lowercase + uppercase
 377 except ImportError:
 378     pass                                          # Use the original versions