Lib/string.py

   1 # module 'string' -- A collection of string operations
   2
   3 # Warning: most of the code you see here isn't normally used nowadays.
   4 # At the end of this file most functions are replaced by built-in
   5 # functions imported from built-in module "strop".
   6
   7 # Some strings for ctype-style character classification
   8 whitespace = ' \t\n\r\v\f'
   9 lowercase = 'abcdefghijklmnopqrstuvwxyz'
  10 uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
  11 letters = lowercase + uppercase
  12 digits = '0123456789'
  13 hexdigits = digits + 'abcdef' + 'ABCDEF'
  14 octdigits = '01234567'
  15
  16 # Case conversion helpers
  17 _idmap = ''
  18 for i in range(256): _idmap = _idmap + chr(i)
  19 _lower = _idmap[:ord('A')] + lowercase + _idmap[ord('Z')+1:]
  20 _upper = _idmap[:ord('a')] + uppercase + _idmap[ord('z')+1:]
  21 _swapcase = _upper[:ord('A')] + lowercase + _upper[ord('Z')+1:]
  22 del i
  23
  24 # convert UPPER CASE letters to lower case
  25 def lower(s):
  26         res = ''
  27         for c in s:
  28                 res = res + _lower[ord(c)]
  29         return res
  30
  31 # Convert lower case letters to UPPER CASE
  32 def upper(s):
  33         res = ''
  34         for c in s:
  35                 res = res + _upper[ord(c)]
  36         return res
  37
  38 # Swap lower case letters and UPPER CASE
  39 def swapcase(s):
  40         res = ''
  41         for c in s:
  42                 res = res + _swapcase[ord(c)]
  43         return res
  44
  45 # Strip leading and trailing tabs and spaces
  46 def strip(s):
  47         i, j = 0, len(s)
  48         while i < j and s[i] in whitespace: i = i+1
  49         while i < j and s[j-1] in whitespace: j = j-1
  50         return s[i:j]
  51
  52 # Split a string into a list of space/tab-separated words
  53 # NB: split(s) is NOT the same as splitfields(s, ' ')!
  54 def split(s):
  55         res = []
  56         i, n = 0, len(s)
  57         while i < n:
  58                 while i < n and s[i] in whitespace: i = i+1
  59                 if i == n: break
  60                 j = i
  61                 while j < n and s[j] not in whitespace: j = j+1
  62                 res.append(s[i:j])
  63                 i = j
  64         return res
  65
  66 # Split a list into fields separated by a given string
  67 # NB: splitfields(s, ' ') is NOT the same as split(s)!
  68 # splitfields(s, '') returns [s] (in analogy with split() in nawk)
  69 def splitfields(s, sep):
  70         res = []
  71         nsep = len(sep)
  72         if nsep == 0:
  73                 return [s]
  74         ns = len(s)
  75         i = j = 0
  76         while j+nsep <= ns:
  77                 if s[j:j+nsep] == sep:
  78                         res.append(s[i:j])
  79                         i = j = j + nsep
  80                 else:
  81                         j = j + 1
  82         res.append(s[i:])
  83         return res
  84
  85 # Join words with spaces between them
  86 def join(words):
  87         return joinfields(words, ' ')
  88
  89 # Join fields with separator
  90 def joinfields(words, sep):
  91         res = ''
  92         for w in words:
  93                 res = res + (sep + w)
  94         return res[len(sep):]
  95
  96 # Find substring, raise exception if not found
  97 index_error = 'substring not found in string.index'
  98 def index(s, sub, *args):
  99         if args:
 100                 if len(args) > 1:
 101                         raise TypeError, 'string.index(): too many args'
 102                 i = args[0]
 103                 if i < 0: i = i + len(s)
 104         else:
 105                 i = 0
 106         n = len(sub)
 107         m = len(s) + 1 - n
 108         while i < m:
 109                 if sub == s[i:i+n]: return i
 110                 i = i+1
 111         raise index_error, (s, sub) + args
 112
 113 # Find last substring, raise exception if not found
 114 def rindex(s, sub, *args):
 115         if args:
 116                 if len(args) > 1:
 117                         raise TypeError, 'string.rindex(): too many args'
 118                 i = args[0]
 119                 if i < 0: i = i + len(s)
 120         else:
 121                 i = 0
 122         n = len(sub)
 123         m = len(s) + 1 - n
 124         r = None
 125         while i < m:
 126                 if sub == s[i:i+n]: r = i
 127                 i = i+1
 128         if r is None:
 129                 raise index_error, (s, sub) + args
 130         return r
 131
 132 # Find substring, return -1 if not found
 133 def find(*args):
 134         try:
 135                 return apply(index, args)
 136         except index_error:
 137                 return -1
 138
 139 # Find last substring, return -1 if not found
 140 def rfind(*args):
 141         try:
 142                 return apply(rindex, args)
 143         except index_error:
 144                 return -1
 145
 146 # Convert string to float
 147 atof_error = 'non-float argument to string.atof'
 148 def atof(str):
 149         import regex
 150         sign = ''
 151         s = str
 152         if s and s[0] in '+-':
 153                 sign = s[0]
 154                 s = s[1:]
 155         if not s: raise atof_error, str
 156         while s[0] == '0' and len(s) > 1 and s[1] in digits: s = s[1:]
 157         if regex.match('[0-9]*\(\.[0-9]*\)?\([eE][-+]?[0-9]+\)?', s) != len(s):
 158                 raise atof_error, str
 159         try:
 160                 return float(eval(sign + s))
 161         except SyntaxError:
 162                 raise atof_error, str
 163
 164 # Convert string to integer
 165 atoi_error = 'non-integer argument to string.atoi'
 166 def atoi(str):
 167         sign = ''
 168         s = str
 169         if s and s[0] in '+-':
 170                 sign = s[0]
 171                 s = s[1:]
 172         if not s: raise atoi_error, str
 173         while s[0] == '0' and len(s) > 1: s = s[1:]
 174         for c in s:
 175                 if c not in digits: raise atoi_error, str
 176         return eval(sign + s)
 177
 178 # Convert string to long integer
 179 atol_error = 'non-integer argument to string.atol'
 180 def atol(str):
 181         sign = ''
 182         s = str
 183         if s and s[0] in '+-':
 184                 sign = s[0]
 185                 s = s[1:]
 186         if not s: raise atoi_error, str
 187         while s[0] == '0' and len(s) > 1: s = s[1:]
 188         for c in s:
 189                 if c not in digits: raise atoi_error, str
 190         return eval(sign + s + 'L')
 191
 192 # Left-justify a string
 193 def ljust(s, width):
 194         n = width - len(s)
 195         if n <= 0: return s
 196         return s + ' '*n
 197
 198 # Right-justify a string
 199 def rjust(s, width):
 200         n = width - len(s)
 201         if n <= 0: return s
 202         return ' '*n + s
 203
 204 # Center a string
 205 def center(s, width):
 206         n = width - len(s)
 207         if n <= 0: return s
 208         half = n/2
 209         if n%2 and width%2:
 210                 # This ensures that center(center(s, i), j) = center(s, j)
 211                 half = half+1
 212         return ' '*half +  s + ' '*(n-half)
 213
 214 # Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
 215 # Decadent feature: the argument may be a string or a number
 216 # (Use of this is deprecated; it should be a string as with ljust c.s.)
 217 def zfill(x, width):
 218         if type(x) == type(''): s = x
 219         else: s = `x`
 220         n = len(s)
 221         if n >= width: return s
 222         sign = ''
 223         if s[0] in ('-', '+'):
 224                 sign, s = s[0], s[1:]
 225         return sign + '0'*(width-n) + s
 226
 227 # Expand tabs in a string.
 228 # Doesn't take non-printing chars into account, but does understand \n.
 229 def expandtabs(s, tabsize):
 230         res = line = ''
 231         for c in s:
 232                 if c == '\t':
 233                         c = ' '*(tabsize - len(line)%tabsize)
 234                 line = line + c
 235                 if c == '\n':
 236                         res = res + line
 237                         line = ''
 238         return res + line
 239
 240
 241 # Try importing optional built-in module "strop" -- if it exists,
 242 # it redefines some string operations that are 100-1000 times faster.
 243 # It also defines values for whitespace, lowercase and uppercase
 244 # that match <ctype.h>'s definitions.
 245
 246 try:
 247         from strop import *
 248         letters = lowercase + uppercase
 249 except ImportError:
 250         pass # Use the original, slow versions
 251
 252 # If certain functions are found, redefine the corresponding exceptions
 253 # as ValueError
 254
 255 try:
 256         from strop import index
 257         index_error = ValueError
 258 except ImportError:
 259         pass # Use the original, slow versions
 260
 261 try:
 262         from strop import atoi
 263         atoi_error = ValueError
 264 except ImportError:
 265         pass # Use the original, slow versions
 266
 267 try:
 268         from strop import atof
 269         atof_error = ValueError
 270 except ImportError:
 271         pass # Use the original, slow versions
 272
 273 try:
 274         from strop import atol
 275         atol_error = ValueError
 276 except ImportError:
 277         pass # Use the original, slow versions