Don't reference removed files in Makefile
[python/dscho.git] / Lib / string.py
blobf69d62d70522b3b4f0468b0edacf712b6441c24b
1 # module 'string' -- A collection of string operations
3 # Warning: most of the code you see here isn't normally used nowadays.
4 # At the end of this file most functions are replaced by built-in
5 # functions imported from built-in module "strop".
7 # Some strings for ctype-style character classification
8 whitespace = ' \t\n\r\v\f'
9 lowercase = 'abcdefghijklmnopqrstuvwxyz'
10 uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
11 letters = lowercase + uppercase
12 digits = '0123456789'
13 hexdigits = digits + 'abcdef' + 'ABCDEF'
14 octdigits = '01234567'
16 # Case conversion helpers
17 _idmap = ''
18 for i in range(256): _idmap = _idmap + chr(i)
19 _lower = _idmap[:ord('A')] + lowercase + _idmap[ord('Z')+1:]
20 _upper = _idmap[:ord('a')] + uppercase + _idmap[ord('z')+1:]
21 _swapcase = _upper[:ord('A')] + lowercase + _upper[ord('Z')+1:]
22 del i
24 # Backward compatible names for exceptions
25 index_error = ValueError
26 atoi_error = ValueError
27 atof_error = ValueError
28 atol_error = ValueError
30 # convert UPPER CASE letters to lower case
31 def lower(s):
32 res = ''
33 for c in s:
34 res = res + _lower[ord(c)]
35 return res
37 # Convert lower case letters to UPPER CASE
38 def upper(s):
39 res = ''
40 for c in s:
41 res = res + _upper[ord(c)]
42 return res
44 # Swap lower case letters and UPPER CASE
45 def swapcase(s):
46 res = ''
47 for c in s:
48 res = res + _swapcase[ord(c)]
49 return res
51 # Strip leading and trailing tabs and spaces
52 def strip(s):
53 i, j = 0, len(s)
54 while i < j and s[i] in whitespace: i = i+1
55 while i < j and s[j-1] in whitespace: j = j-1
56 return s[i:j]
58 # Split a string into a list of space/tab-separated words
59 # NB: split(s) is NOT the same as splitfields(s, ' ')!
60 def split(s):
61 res = []
62 i, n = 0, len(s)
63 while i < n:
64 while i < n and s[i] in whitespace: i = i+1
65 if i == n: break
66 j = i
67 while j < n and s[j] not in whitespace: j = j+1
68 res.append(s[i:j])
69 i = j
70 return res
72 # Split a list into fields separated by a given string
73 # NB: splitfields(s, ' ') is NOT the same as split(s)!
74 # splitfields(s, '') returns [s] (in analogy with split() in nawk)
75 def splitfields(s, sep):
76 res = []
77 nsep = len(sep)
78 if nsep == 0:
79 return [s]
80 ns = len(s)
81 i = j = 0
82 while j+nsep <= ns:
83 if s[j:j+nsep] == sep:
84 res.append(s[i:j])
85 i = j = j + nsep
86 else:
87 j = j + 1
88 res.append(s[i:])
89 return res
91 # Join words with spaces between them
92 def join(words):
93 return joinfields(words, ' ')
95 # Join fields with separator
96 def joinfields(words, sep):
97 res = ''
98 for w in words:
99 res = res + (sep + w)
100 return res[len(sep):]
102 # Find substring, raise exception if not found
103 def index(s, sub, i = 0):
104 res = find(s, sub, i)
105 if res < 0:
106 raise ValueError, 'substring not found in string.index'
107 return res
109 # Find last substring, raise exception if not found
110 def rindex(s, sub, i = 0):
111 res = rfind(s, sub, i)
112 if res < 0:
113 raise ValueError, 'substring not found in string.index'
114 return res
116 # Count non-overlapping occurrences of substring
117 def count(s, sub, i = 0):
118 if i < 0: i = max(0, i + len(s))
119 n = len(sub)
120 m = len(s) + 1 - n
121 if n == 0: return m-i
122 r = 0
123 while i < m:
124 if sub == s[i:i+n]:
125 r = r+1
126 i = i+n
127 else:
128 i = i+1
129 return r
131 # Find substring, return -1 if not found
132 def find(s, sub, i = 0):
133 if i < 0: i = max(0, i + len(s))
134 n = len(sub)
135 m = len(s) + 1 - n
136 while i < m:
137 if sub == s[i:i+n]: return i
138 i = i+1
139 return -1
141 # Find last substring, return -1 if not found
142 def rfind(s, sub, i = 0):
143 if i < 0: i = max(0, i + len(s))
144 n = len(sub)
145 m = len(s) + 1 - n
146 r = -1
147 while i < m:
148 if sub == s[i:i+n]: r = i
149 i = i+1
150 return r
152 # Convert string to float
153 def atof(str):
154 import regex
155 sign = ''
156 s = str
157 if s and s[0] in '+-':
158 sign = s[0]
159 s = s[1:]
160 if not s:
161 raise ValueError, 'non-float argument to string.atof'
162 while s[0] == '0' and len(s) > 1 and s[1] in digits: s = s[1:]
163 if regex.match('[0-9]*\(\.[0-9]*\)?\([eE][-+]?[0-9]+\)?', s) != len(s):
164 raise ValueError, 'non-float argument to string.atof'
165 try:
166 return float(eval(sign + s))
167 except SyntaxError:
168 raise ValueError, 'non-float argument to string.atof'
170 # Convert string to integer
171 def atoi(str, base=10):
172 if base != 10:
173 # We only get here if strop doesn't define atoi()
174 raise ValueError, "this string.atoi doesn't support base != 10"
175 sign = ''
176 s = str
177 if s and s[0] in '+-':
178 sign = s[0]
179 s = s[1:]
180 if not s:
181 raise ValueError, 'non-integer argument to string.atoi'
182 while s[0] == '0' and len(s) > 1: s = s[1:]
183 for c in s:
184 if c not in digits:
185 raise ValueError, 'non-integer argument to string.atoi'
186 return eval(sign + s)
188 # Convert string to long integer
189 def atol(str, base=10):
190 if base != 10:
191 # We only get here if strop doesn't define atol()
192 raise ValueError, "this string.atol doesn't support base != 10"
193 sign = ''
194 s = str
195 if s and s[0] in '+-':
196 sign = s[0]
197 s = s[1:]
198 if not s:
199 raise ValueError, 'non-integer argument to string.atol'
200 while s[0] == '0' and len(s) > 1: s = s[1:]
201 for c in s:
202 if c not in digits:
203 raise ValueError, 'non-integer argument to string.atol'
204 return eval(sign + s + 'L')
206 # Left-justify a string
207 def ljust(s, width):
208 n = width - len(s)
209 if n <= 0: return s
210 return s + ' '*n
212 # Right-justify a string
213 def rjust(s, width):
214 n = width - len(s)
215 if n <= 0: return s
216 return ' '*n + s
218 # Center a string
219 def center(s, width):
220 n = width - len(s)
221 if n <= 0: return s
222 half = n/2
223 if n%2 and width%2:
224 # This ensures that center(center(s, i), j) = center(s, j)
225 half = half+1
226 return ' '*half + s + ' '*(n-half)
228 # Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
229 # Decadent feature: the argument may be a string or a number
230 # (Use of this is deprecated; it should be a string as with ljust c.s.)
231 def zfill(x, width):
232 if type(x) == type(''): s = x
233 else: s = `x`
234 n = len(s)
235 if n >= width: return s
236 sign = ''
237 if s[0] in ('-', '+'):
238 sign, s = s[0], s[1:]
239 return sign + '0'*(width-n) + s
241 # Expand tabs in a string.
242 # Doesn't take non-printing chars into account, but does understand \n.
243 def expandtabs(s, tabsize):
244 res = line = ''
245 for c in s:
246 if c == '\t':
247 c = ' '*(tabsize - len(line)%tabsize)
248 line = line + c
249 if c == '\n':
250 res = res + line
251 line = ''
252 return res + line
255 # Try importing optional built-in module "strop" -- if it exists,
256 # it redefines some string operations that are 100-1000 times faster.
257 # It also defines values for whitespace, lowercase and uppercase
258 # that match <ctype.h>'s definitions.
260 try:
261 from strop import *
262 letters = lowercase + uppercase
263 except ImportError:
264 pass # Use the original, slow versions