Lib/string.py

   1 # module 'string' -- A collection of string operations
   2
   3 # Warning: most of the code you see here isn't normally used nowadays.
   4 # At the end of this file most functions are replaced by built-in
   5 # functions imported from built-in module "strop".
   6
   7 # Some strings for ctype-style character classification
   8 whitespace = ' \t\n\r\v\f'
   9 lowercase = 'abcdefghijklmnopqrstuvwxyz'
  10 uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
  11 letters = lowercase + uppercase
  12 digits = '0123456789'
  13 hexdigits = digits + 'abcdef' + 'ABCDEF'
  14 octdigits = '01234567'
  15
  16 # Case conversion helpers
  17 _idmap = ''
  18 for i in range(256): _idmap = _idmap + chr(i)
  19 _lower = _idmap[:ord('A')] + lowercase + _idmap[ord('Z')+1:]
  20 _upper = _idmap[:ord('a')] + uppercase + _idmap[ord('z')+1:]
  21 _swapcase = _upper[:ord('A')] + lowercase + _upper[ord('Z')+1:]
  22 del i
  23
  24 # Backward compatible names for exceptions
  25 index_error = ValueError
  26 atoi_error = ValueError
  27 atof_error = ValueError
  28 atol_error = ValueError
  29
  30 # convert UPPER CASE letters to lower case
  31 def lower(s):
  32         res = ''
  33         for c in s:
  34                 res = res + _lower[ord(c)]
  35         return res
  36
  37 # Convert lower case letters to UPPER CASE
  38 def upper(s):
  39         res = ''
  40         for c in s:
  41                 res = res + _upper[ord(c)]
  42         return res
  43
  44 # Swap lower case letters and UPPER CASE
  45 def swapcase(s):
  46         res = ''
  47         for c in s:
  48                 res = res + _swapcase[ord(c)]
  49         return res
  50
  51 # Strip leading and trailing tabs and spaces
  52 def strip(s):
  53         i, j = 0, len(s)
  54         while i < j and s[i] in whitespace: i = i+1
  55         while i < j and s[j-1] in whitespace: j = j-1
  56         return s[i:j]
  57
  58 # Split a string into a list of space/tab-separated words
  59 # NB: split(s) is NOT the same as splitfields(s, ' ')!
  60 def split(s, sep=None):
  61         if sep is not None: return splitfields(s, sep)
  62         res = []
  63         i, n = 0, len(s)
  64         while i < n:
  65                 while i < n and s[i] in whitespace: i = i+1
  66                 if i == n: break
  67                 j = i
  68                 while j < n and s[j] not in whitespace: j = j+1
  69                 res.append(s[i:j])
  70                 i = j
  71         return res
  72
  73 # Split a list into fields separated by a given string
  74 # NB: splitfields(s, ' ') is NOT the same as split(s)!
  75 # splitfields(s, '') returns [s] (in analogy with split() in nawk)
  76 def splitfields(s, sep=None):
  77         if sep is None: return split(s)
  78         res = []
  79         nsep = len(sep)
  80         if nsep == 0:
  81                 return [s]
  82         ns = len(s)
  83         i = j = 0
  84         while j+nsep <= ns:
  85                 if s[j:j+nsep] == sep:
  86                         res.append(s[i:j])
  87                         i = j = j + nsep
  88                 else:
  89                         j = j + 1
  90         res.append(s[i:])
  91         return res
  92
  93 # Join words with spaces between them
  94 def join(words, sep = ' '):
  95         return joinfields(words, sep)
  96
  97 # Join fields with optional separator
  98 def joinfields(words, sep = ' '):
  99         res = ''
 100         for w in words:
 101                 res = res + (sep + w)
 102         return res[len(sep):]
 103
 104 # Find substring, raise exception if not found
 105 def index(s, sub, i = 0):
 106         res = find(s, sub, i)
 107         if res < 0:
 108                 raise ValueError, 'substring not found in string.index'
 109         return res
 110
 111 # Find last substring, raise exception if not found
 112 def rindex(s, sub, i = 0):
 113         res = rfind(s, sub, i)
 114         if res < 0:
 115                 raise ValueError, 'substring not found in string.index'
 116         return res
 117
 118 # Count non-overlapping occurrences of substring
 119 def count(s, sub, i = 0):
 120         if i < 0: i = max(0, i + len(s))
 121         n = len(sub)
 122         m = len(s) + 1 - n
 123         if n == 0: return m-i
 124         r = 0
 125         while i < m:
 126                 if sub == s[i:i+n]:
 127                         r = r+1
 128                         i = i+n
 129                 else:
 130                         i = i+1
 131         return r
 132
 133 # Find substring, return -1 if not found
 134 def find(s, sub, i = 0):
 135         if i < 0: i = max(0, i + len(s))
 136         n = len(sub)
 137         m = len(s) + 1 - n
 138         while i < m:
 139                 if sub == s[i:i+n]: return i
 140                 i = i+1
 141         return -1
 142
 143 # Find last substring, return -1 if not found
 144 def rfind(s, sub, i = 0):
 145         if i < 0: i = max(0, i + len(s))
 146         n = len(sub)
 147         m = len(s) + 1 - n
 148         r = -1
 149         while i < m:
 150                 if sub == s[i:i+n]: r = i
 151                 i = i+1
 152         return r
 153
 154 # Convert string to float
 155 def atof(str):
 156         import regex
 157         sign = ''
 158         s = str
 159         if s and s[0] in '+-':
 160                 sign = s[0]
 161                 s = s[1:]
 162         if not s:
 163                 raise ValueError, 'non-float argument to string.atof'
 164         while s[0] == '0' and len(s) > 1 and s[1] in digits: s = s[1:]
 165         if regex.match('[0-9]*\(\.[0-9]*\)?\([eE][-+]?[0-9]+\)?', s) != len(s):
 166                 raise ValueError, 'non-float argument to string.atof'
 167         try:
 168                 return float(eval(sign + s))
 169         except SyntaxError:
 170                 raise ValueError, 'non-float argument to string.atof'
 171
 172 # Convert string to integer
 173 def atoi(str, base=10):
 174         if base != 10:
 175                 # We only get here if strop doesn't define atoi()
 176                 raise ValueError, "this string.atoi doesn't support base != 10"
 177         sign = ''
 178         s = str
 179         if s and s[0] in '+-':
 180                 sign = s[0]
 181                 s = s[1:]
 182         if not s:
 183                 raise ValueError, 'non-integer argument to string.atoi'
 184         while s[0] == '0' and len(s) > 1: s = s[1:]
 185         for c in s:
 186                 if c not in digits:
 187                         raise ValueError, 'non-integer argument to string.atoi'
 188         return eval(sign + s)
 189
 190 # Convert string to long integer
 191 def atol(str, base=10):
 192         if base != 10:
 193                 # We only get here if strop doesn't define atol()
 194                 raise ValueError, "this string.atol doesn't support base != 10"
 195         sign = ''
 196         s = str
 197         if s and s[0] in '+-':
 198                 sign = s[0]
 199                 s = s[1:]
 200         if not s:
 201                 raise ValueError, 'non-integer argument to string.atol'
 202         while s[0] == '0' and len(s) > 1: s = s[1:]
 203         for c in s:
 204                 if c not in digits:
 205                         raise ValueError, 'non-integer argument to string.atol'
 206         return eval(sign + s + 'L')
 207
 208 # Left-justify a string
 209 def ljust(s, width):
 210         n = width - len(s)
 211         if n <= 0: return s
 212         return s + ' '*n
 213
 214 # Right-justify a string
 215 def rjust(s, width):
 216         n = width - len(s)
 217         if n <= 0: return s
 218         return ' '*n + s
 219
 220 # Center a string
 221 def center(s, width):
 222         n = width - len(s)
 223         if n <= 0: return s
 224         half = n/2
 225         if n%2 and width%2:
 226                 # This ensures that center(center(s, i), j) = center(s, j)
 227                 half = half+1
 228         return ' '*half +  s + ' '*(n-half)
 229
 230 # Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
 231 # Decadent feature: the argument may be a string or a number
 232 # (Use of this is deprecated; it should be a string as with ljust c.s.)
 233 def zfill(x, width):
 234         if type(x) == type(''): s = x
 235         else: s = `x`
 236         n = len(s)
 237         if n >= width: return s
 238         sign = ''
 239         if s[0] in ('-', '+'):
 240                 sign, s = s[0], s[1:]
 241         return sign + '0'*(width-n) + s
 242
 243 # Expand tabs in a string.
 244 # Doesn't take non-printing chars into account, but does understand \n.
 245 def expandtabs(s, tabsize=8):
 246         res = line = ''
 247         for c in s:
 248                 if c == '\t':
 249                         c = ' '*(tabsize - len(line)%tabsize)
 250                 line = line + c
 251                 if c == '\n':
 252                         res = res + line
 253                         line = ''
 254         return res + line
 255
 256
 257 # Try importing optional built-in module "strop" -- if it exists,
 258 # it redefines some string operations that are 100-1000 times faster.
 259 # It also defines values for whitespace, lowercase and uppercase
 260 # that match <ctype.h>'s definitions.
 261
 262 try:
 263         from strop import *
 264         letters = lowercase + uppercase
 265 except ImportError:
 266         pass # Use the original, slow versions