Lib/string.py

   1 # module 'string' -- A collection of string operations
   2
   3 # Warning: most of the code you see here isn't normally used nowadays.
   4 # At the end of this file most functions are replaced by built-in
   5 # functions imported from built-in module "strop".
   6
   7 # Some strings for ctype-style character classification
   8 whitespace = ' \t\n\r\v\f'
   9 lowercase = 'abcdefghijklmnopqrstuvwxyz'
  10 uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
  11 letters = lowercase + uppercase
  12 digits = '0123456789'
  13 hexdigits = digits + 'abcdef' + 'ABCDEF'
  14 octdigits = '01234567'
  15
  16 # Case conversion helpers
  17 _idmap = ''
  18 for i in range(256): _idmap = _idmap + chr(i)
  19 _lower = _idmap[:ord('A')] + lowercase + _idmap[ord('Z')+1:]
  20 _upper = _idmap[:ord('a')] + uppercase + _idmap[ord('z')+1:]
  21 _swapcase = _upper[:ord('A')] + lowercase + _upper[ord('Z')+1:]
  22 del i
  23
  24 # Backward compatible names for exceptions
  25 index_error = ValueError
  26 atoi_error = ValueError
  27 atof_error = ValueError
  28 atol_error = ValueError
  29
  30 # convert UPPER CASE letters to lower case
  31 def lower(s):
  32         res = ''
  33         for c in s:
  34                 res = res + _lower[ord(c)]
  35         return res
  36
  37 # Convert lower case letters to UPPER CASE
  38 def upper(s):
  39         res = ''
  40         for c in s:
  41                 res = res + _upper[ord(c)]
  42         return res
  43
  44 # Swap lower case letters and UPPER CASE
  45 def swapcase(s):
  46         res = ''
  47         for c in s:
  48                 res = res + _swapcase[ord(c)]
  49         return res
  50
  51 # Strip leading and trailing tabs and spaces
  52 def strip(s):
  53         i, j = 0, len(s)
  54         while i < j and s[i] in whitespace: i = i+1
  55         while i < j and s[j-1] in whitespace: j = j-1
  56         return s[i:j]
  57
  58 # Split a string into a list of space/tab-separated words
  59 # NB: split(s) is NOT the same as splitfields(s, ' ')!
  60 def split(s):
  61         res = []
  62         i, n = 0, len(s)
  63         while i < n:
  64                 while i < n and s[i] in whitespace: i = i+1
  65                 if i == n: break
  66                 j = i
  67                 while j < n and s[j] not in whitespace: j = j+1
  68                 res.append(s[i:j])
  69                 i = j
  70         return res
  71
  72 # Split a list into fields separated by a given string
  73 # NB: splitfields(s, ' ') is NOT the same as split(s)!
  74 # splitfields(s, '') returns [s] (in analogy with split() in nawk)
  75 def splitfields(s, sep):
  76         res = []
  77         nsep = len(sep)
  78         if nsep == 0:
  79                 return [s]
  80         ns = len(s)
  81         i = j = 0
  82         while j+nsep <= ns:
  83                 if s[j:j+nsep] == sep:
  84                         res.append(s[i:j])
  85                         i = j = j + nsep
  86                 else:
  87                         j = j + 1
  88         res.append(s[i:])
  89         return res
  90
  91 # Join words with spaces between them
  92 def join(words):
  93         return joinfields(words, ' ')
  94
  95 # Join fields with separator
  96 def joinfields(words, sep):
  97         res = ''
  98         for w in words:
  99                 res = res + (sep + w)
 100         return res[len(sep):]
 101
 102 # Find substring, raise exception if not found
 103 def index(s, sub, i = 0):
 104         res = find(s, sub, i)
 105         if res < 0:
 106                 raise ValueError, 'substring not found in string.index'
 107         return res
 108
 109 # Find last substring, raise exception if not found
 110 def rindex(s, sub, i = 0):
 111         res = rfind(s, sub, i)
 112         if res < 0:
 113                 raise ValueError, 'substring not found in string.index'
 114         return res
 115
 116 # Count non-overlapping occurrences of substring
 117 def count(s, sub, i = 0):
 118         if i < 0: i = max(0, i + len(s))
 119         n = len(sub)
 120         m = len(s) + 1 - n
 121         if n == 0: return m-i
 122         r = 0
 123         while i < m:
 124                 if sub == s[i:i+n]:
 125                         r = r+1
 126                         i = i+n
 127                 else:
 128                         i = i+1
 129         return r
 130
 131 # Find substring, return -1 if not found
 132 def find(s, sub, i = 0):
 133         if i < 0: i = max(0, i + len(s))
 134         n = len(sub)
 135         m = len(s) + 1 - n
 136         while i < m:
 137                 if sub == s[i:i+n]: return i
 138                 i = i+1
 139         return -1
 140
 141 # Find last substring, return -1 if not found
 142 def rfind(s, sub, i = 0):
 143         if i < 0: i = max(0, i + len(s))
 144         n = len(sub)
 145         m = len(s) + 1 - n
 146         r = -1
 147         while i < m:
 148                 if sub == s[i:i+n]: r = i
 149                 i = i+1
 150         return r
 151
 152 # Convert string to float
 153 def atof(str):
 154         import regex
 155         sign = ''
 156         s = str
 157         if s and s[0] in '+-':
 158                 sign = s[0]
 159                 s = s[1:]
 160         if not s:
 161                 raise ValueError, 'non-float argument to string.atof'
 162         while s[0] == '0' and len(s) > 1 and s[1] in digits: s = s[1:]
 163         if regex.match('[0-9]*\(\.[0-9]*\)?\([eE][-+]?[0-9]+\)?', s) != len(s):
 164                 raise ValueError, 'non-float argument to string.atof'
 165         try:
 166                 return float(eval(sign + s))
 167         except SyntaxError:
 168                 raise ValueError, 'non-float argument to string.atof'
 169
 170 # Convert string to integer
 171 def atoi(str, base=10):
 172         if base != 10:
 173                 # We only get here if strop doesn't define atoi()
 174                 raise ValueError, "this string.atoi doesn't support base != 10"
 175         sign = ''
 176         s = str
 177         if s and s[0] in '+-':
 178                 sign = s[0]
 179                 s = s[1:]
 180         if not s:
 181                 raise ValueError, 'non-integer argument to string.atoi'
 182         while s[0] == '0' and len(s) > 1: s = s[1:]
 183         for c in s:
 184                 if c not in digits:
 185                         raise ValueError, 'non-integer argument to string.atoi'
 186         return eval(sign + s)
 187
 188 # Convert string to long integer
 189 def atol(str, base=10):
 190         if base != 10:
 191                 # We only get here if strop doesn't define atol()
 192                 raise ValueError, "this string.atol doesn't support base != 10"
 193         sign = ''
 194         s = str
 195         if s and s[0] in '+-':
 196                 sign = s[0]
 197                 s = s[1:]
 198         if not s:
 199                 raise ValueError, 'non-integer argument to string.atol'
 200         while s[0] == '0' and len(s) > 1: s = s[1:]
 201         for c in s:
 202                 if c not in digits:
 203                         raise ValueError, 'non-integer argument to string.atol'
 204         return eval(sign + s + 'L')
 205
 206 # Left-justify a string
 207 def ljust(s, width):
 208         n = width - len(s)
 209         if n <= 0: return s
 210         return s + ' '*n
 211
 212 # Right-justify a string
 213 def rjust(s, width):
 214         n = width - len(s)
 215         if n <= 0: return s
 216         return ' '*n + s
 217
 218 # Center a string
 219 def center(s, width):
 220         n = width - len(s)
 221         if n <= 0: return s
 222         half = n/2
 223         if n%2 and width%2:
 224                 # This ensures that center(center(s, i), j) = center(s, j)
 225                 half = half+1
 226         return ' '*half +  s + ' '*(n-half)
 227
 228 # Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
 229 # Decadent feature: the argument may be a string or a number
 230 # (Use of this is deprecated; it should be a string as with ljust c.s.)
 231 def zfill(x, width):
 232         if type(x) == type(''): s = x
 233         else: s = `x`
 234         n = len(s)
 235         if n >= width: return s
 236         sign = ''
 237         if s[0] in ('-', '+'):
 238                 sign, s = s[0], s[1:]
 239         return sign + '0'*(width-n) + s
 240
 241 # Expand tabs in a string.
 242 # Doesn't take non-printing chars into account, but does understand \n.
 243 def expandtabs(s, tabsize):
 244         res = line = ''
 245         for c in s:
 246                 if c == '\t':
 247                         c = ' '*(tabsize - len(line)%tabsize)
 248                 line = line + c
 249                 if c == '\n':
 250                         res = res + line
 251                         line = ''
 252         return res + line
 253
 254
 255 # Try importing optional built-in module "strop" -- if it exists,
 256 # it redefines some string operations that are 100-1000 times faster.
 257 # It also defines values for whitespace, lowercase and uppercase
 258 # that match <ctype.h>'s definitions.
 259
 260 try:
 261         from strop import *
 262         letters = lowercase + uppercase
 263 except ImportError:
 264         pass # Use the original, slow versions