1 # module 'string' -- A collection of string operations
3 # Warning: most of the code you see here isn't normally used nowadays.
4 # At the end of this file most functions are replaced by built-in
5 # functions imported from built-in module "strop".
7 """Common string manipulations.
9 Public module variables:
11 whitespace -- a string containing all characters considered whitespace
12 lowercase -- a string containing all characters considered lowercase letters
13 uppercase -- a string containing all characters considered uppercase letters
14 letters -- a string containing all characters considered letters
15 digits -- a string containing all characters considered decimal digits
16 hexdigits -- a string containing all characters considered hexadecimal digits
17 octdigits -- a string containing all characters considered octal digits
21 # Some strings for ctype-style character classification
22 whitespace
= ' \t\n\r\v\f'
23 lowercase
= 'abcdefghijklmnopqrstuvwxyz'
24 uppercase
= 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
25 letters
= lowercase
+ uppercase
27 hexdigits
= digits
+ 'abcdef' + 'ABCDEF'
28 octdigits
= '01234567'
30 # Case conversion helpers
32 for i
in range(256): _idmap
= _idmap
+ chr(i
)
33 _lower
= _idmap
[:ord('A')] + lowercase
+ _idmap
[ord('Z')+1:]
34 _upper
= _idmap
[:ord('a')] + uppercase
+ _idmap
[ord('z')+1:]
35 _swapcase
= _upper
[:ord('A')] + lowercase
+ _upper
[ord('Z')+1:]
38 # Backward compatible names for exceptions
39 index_error
= ValueError
40 atoi_error
= ValueError
41 atof_error
= ValueError
42 atol_error
= ValueError
44 # convert UPPER CASE letters to lower case
48 Return a copy of the string s converted to lowercase.
53 res
= res
+ _lower
[ord(c
)]
56 # Convert lower case letters to UPPER CASE
60 Return a copy of the string s converted to uppercase.
65 res
= res
+ _upper
[ord(c
)]
68 # Swap lower case letters and UPPER CASE
70 """swapcase(s) -> string
72 Return a copy of the string s with upper case characters
73 converted to lowercase and vice versa.
78 res
= res
+ _swapcase
[ord(c
)]
81 # Strip leading and trailing tabs and spaces
85 Return a copy of the string s with leading and trailing
90 while i
< j
and s
[i
] in whitespace
: i
= i
+1
91 while i
< j
and s
[j
-1] in whitespace
: j
= j
-1
94 # Strip leading tabs and spaces
96 """lstrip(s) -> string
98 Return a copy of the string s with leading whitespace removed.
102 while i
< j
and s
[i
] in whitespace
: i
= i
+1
105 # Strip trailing tabs and spaces
107 """rstrip(s) -> string
109 Return a copy of the string s with trailing whitespace
114 while i
< j
and s
[j
-1] in whitespace
: j
= j
-1
118 # Split a string into a list of space/tab-separated words
119 # NB: split(s) is NOT the same as splitfields(s, ' ')!
120 def split(s
, sep
=None, maxsplit
=0):
121 """split(str [,sep [,maxsplit]]) -> list of strings
123 Return a list of the words in the string s, using sep as the
124 delimiter string. If maxsplit is nonzero, splits into at most
125 maxsplit words If sep is not specified, any whitespace string
126 is a separator. Maxsplit defaults to 0.
128 (split and splitfields are synonymous)
131 if sep
is not None: return splitfields(s
, sep
, maxsplit
)
134 if maxsplit
<= 0: maxsplit
= n
137 while i
< n
and s
[i
] in whitespace
: i
= i
+1
139 if count
>= maxsplit
:
143 while j
< n
and s
[j
] not in whitespace
: j
= j
+1
149 # Split a list into fields separated by a given string
150 # NB: splitfields(s, ' ') is NOT the same as split(s)!
151 # splitfields(s, '') returns [s] (in analogy with split() in nawk)
152 def splitfields(s
, sep
=None, maxsplit
=0):
153 """splitfields(str [,sep [,maxsplit]]) -> list of strings
155 Return a list of the words in the string s, using sep as the
156 delimiter string. If maxsplit is nonzero, splits into at most
157 maxsplit words If sep is not specified, any whitespace string
158 is a separator. Maxsplit defaults to 0.
160 (split and splitfields are synonymous)
163 if sep
is None: return split(s
, None, maxsplit
)
169 if maxsplit
<= 0: maxsplit
= ns
173 if s
[j
:j
+nsep
] == sep
:
177 if count
>= maxsplit
: break
183 # Join words with spaces between them
184 def join(words
, sep
= ' '):
185 """join(list [,sep]) -> string
187 Return a string composed of the words in list, with
188 intervening occurences of sep. Sep defaults to a single
191 (joinfields and join are synonymous)
194 return joinfields(words
, sep
)
196 # Join fields with optional separator
197 def joinfields(words
, sep
= ' '):
198 """joinfields(list [,sep]) -> string
200 Return a string composed of the words in list, with
201 intervening occurences of sep. The default separator is a
204 (joinfields and join are synonymous)
209 res
= res
+ (sep
+ w
)
210 return res
[len(sep
):]
212 # Find substring, raise exception if not found
213 def index(s
, sub
, i
= 0, last
=None):
214 """index(s, sub [,start [,end]]) -> int
216 Return the lowest index in s where substring sub is found,
217 such that sub is contained within s[start,end]. Optional
218 arguments start and end are interpreted as in slice notation.
220 Raise ValueError if not found.
223 if last
is None: last
= len(s
)
224 res
= find(s
, sub
, i
, last
)
226 raise ValueError, 'substring not found in string.index'
229 # Find last substring, raise exception if not found
230 def rindex(s
, sub
, i
= 0, last
=None):
231 """rindex(s, sub [,start [,end]]) -> int
233 Return the highest index in s where substring sub is found,
234 such that sub is contained within s[start,end]. Optional
235 arguments start and end are interpreted as in slice notation.
237 Raise ValueError if not found.
240 if last
is None: last
= len(s
)
241 res
= rfind(s
, sub
, i
, last
)
243 raise ValueError, 'substring not found in string.index'
246 # Count non-overlapping occurrences of substring
247 def count(s
, sub
, i
= 0, last
=None):
248 """count(s, sub[, start[,end]]) -> int
250 Return the number of occurrences of substring sub in string
251 s[start:end]. Optional arguments start and end are
252 interpreted as in slice notation.
255 Slen
= len(s
) # cache this value, for speed
259 last
= max(0, last
+ Slen
)
262 if i
< 0: i
= max(0, i
+ Slen
)
265 if n
== 0: return m
-i
275 # Find substring, return -1 if not found
276 def find(s
, sub
, i
= 0, last
=None):
277 """find(s, sub [,start [,end]]) -> in
279 Return the lowest index in s where substring sub is found,
280 such that sub is contained within s[start,end]. Optional
281 arguments start and end are interpreted as in slice notation.
283 Return -1 on failure.
286 Slen
= len(s
) # cache this value, for speed
290 last
= max(0, last
+ Slen
)
293 if i
< 0: i
= max(0, i
+ Slen
)
297 if sub
== s
[i
:i
+n
]: return i
301 # Find last substring, return -1 if not found
302 def rfind(s
, sub
, i
= 0, last
=None):
303 """rfind(s, sub [,start [,end]]) -> int
305 Return the highest index in s where substring sub is found,
306 such that sub is contained within s[start,end]. Optional
307 arguments start and end are interpreted as in slice notation.
309 Return -1 on failure.
312 Slen
= len(s
) # cache this value, for speed
316 last
= max(0, last
+ Slen
)
319 if i
< 0: i
= max(0, i
+ Slen
)
324 if sub
== s
[i
:i
+n
]: r
= i
328 # "Safe" environment for eval()
329 _safe_env
= {"__builtins__": {}}
331 # Convert string to float
336 Return the floating point number represented by the string s.
341 # Don't fail if re doesn't exist -- just skip the syntax check
350 if s
and s
[0] in '+-':
354 raise ValueError, 'non-float argument to string.atof'
355 while s
[0] == '0' and len(s
) > 1 and s
[1] in digits
: s
= s
[1:]
356 if _re
and not _re
.match('[0-9]*(\.[0-9]*)?([eE][-+]?[0-9]+)?$', s
):
357 raise ValueError, 'non-float argument to string.atof'
359 return float(eval(sign
+ s
, _safe_env
))
361 raise ValueError, 'non-float argument to string.atof'
363 # Convert string to integer
364 def atoi(str, base
=10):
365 """atoi(s [,base]) -> int
367 Return the integer represented by the string s in the given
368 base, which defaults to 10. The string s must consist of one
369 or more digits, possibly preceded by a sign. If base is 0, it
370 is chosen from the leading characters of s, 0 for octal, 0x or
371 0X for hexadecimal. If base is 16, a preceding 0x or 0X is
376 # We only get here if strop doesn't define atoi()
377 raise ValueError, "this string.atoi doesn't support base != 10"
380 if s
and s
[0] in '+-':
384 raise ValueError, 'non-integer argument to string.atoi'
385 while s
[0] == '0' and len(s
) > 1: s
= s
[1:]
388 raise ValueError, 'non-integer argument to string.atoi'
389 return eval(sign
+ s
, _safe_env
)
391 # Convert string to long integer
392 def atol(str, base
=10):
393 """atol(s [,base]) -> long
395 Return the long integer represented by the string s in the
396 given base, which defaults to 10. The string s must consist
397 of one or more digits, possibly preceded by a sign. If base
398 is 0, it is chosen from the leading characters of s, 0 for
399 octal, 0x or 0X for hexadecimal. If base is 16, a preceding
400 0x or 0X is accepted. A trailing L or l is not accepted,
405 # We only get here if strop doesn't define atol()
406 raise ValueError, "this string.atol doesn't support base != 10"
409 if s
and s
[0] in '+-':
413 raise ValueError, 'non-integer argument to string.atol'
414 while s
[0] == '0' and len(s
) > 1: s
= s
[1:]
417 raise ValueError, 'non-integer argument to string.atol'
418 return eval(sign
+ s
+ 'L', _safe_env
)
420 # Left-justify a string
422 """ljust(s, width) -> string
424 Return a left-justified version of s, in a field of the
425 specified width, padded with spaces as needed. The string is
433 # Right-justify a string
435 """rjust(s, width) -> string
437 Return a right-justified version of s, in a field of the
438 specified width, padded with spaces as needed. The string is
447 def center(s
, width
):
448 """center(s, width) -> string
450 Return a center version of s, in a field of the specified
451 width. padded with spaces as needed. The string is never
459 # This ensures that center(center(s, i), j) = center(s, j)
461 return ' '*half
+ s
+ ' '*(n
-half
)
463 # Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
464 # Decadent feature: the argument may be a string or a number
465 # (Use of this is deprecated; it should be a string as with ljust c.s.)
467 """zfill(x, width) -> string
469 Pad a numeric string x with zeros on the left, to fill a field
470 of the specified width. The string x is never truncated.
473 if type(x
) == type(''): s
= x
476 if n
>= width
: return s
478 if s
[0] in ('-', '+'):
479 sign
, s
= s
[0], s
[1:]
480 return sign
+ '0'*(width
-n
) + s
482 # Expand tabs in a string.
483 # Doesn't take non-printing chars into account, but does understand \n.
484 def expandtabs(s
, tabsize
=8):
485 """expandtabs(s [,tabsize]) -> string
487 Return a copy of the string s with all tab characters replaced
488 by the appropriate number of spaces, depending on the current
489 column, and the tabsize (default 8).
495 c
= ' '*(tabsize
- len(line
)%tabsize
)
502 # Character translation through look-up table.
503 def translate(s
, table
, deletions
=""):
504 """translate(s,table [,deletechars]) -> string
506 Return a copy of the string s, where all characters occurring
507 in the optional argument deletechars are removed, and the
508 remaining characters have been mapped through the given
509 translation table, which must be a string of length 256.
512 if type(table
) != type('') or len(table
) != 256:
514 "translation table must be 256 characters long"
517 if c
not in deletions
:
518 res
= res
+ table
[ord(c
)]
521 # Capitalize a string, e.g. "aBc dEf" -> "Abc def".
523 """capitalize(s) -> string
525 Return a copy of the string s with only its first character
529 return upper(s
[:1]) + lower(s
[1:])
531 # Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
532 # See also regsub.capwords().
533 def capwords(s
, sep
=None):
534 """capwords(s, [sep]) -> string
536 Split the argument into words using split, capitalize each
537 word using capitalize, and join the capitalized words using
538 join. Note that this replaces runs of whitespace characters by
542 return join(map(capitalize
, split(s
, sep
)), sep
or ' ')
544 # Construct a translation string
546 def maketrans(fromstr
, tostr
):
547 """maketrans(frm, to) -> string
549 Return a translation table (a string of 256 bytes long)
550 suitable for use in string.translate. The strings frm and to
551 must be of the same length.
554 if len(fromstr
) != len(tostr
):
555 raise ValueError, "maketrans arguments must have same length"
558 _idmapL
= map(None, _idmap
)
560 fromstr
= map(ord, fromstr
)
561 for i
in range(len(fromstr
)):
562 L
[fromstr
[i
]] = tostr
[i
]
563 return joinfields(L
, "")
565 # Substring replacement (global)
566 def replace(str, old
, new
, maxsplit
=0):
567 """replace (str, old, new[, maxsplit]) -> string
569 Return a copy of string str with all occurrences of substring
570 old replaced by new. If the optional argument maxsplit is
571 given, only the first maxsplit occurrences are replaced.
574 return joinfields(splitfields(str, old
, maxsplit
), new
)
577 # Try importing optional built-in module "strop" -- if it exists,
578 # it redefines some string operations that are 100-1000 times faster.
579 # It also defines values for whitespace, lowercase and uppercase
580 # that match <ctype.h>'s definitions.
584 letters
= lowercase
+ uppercase
586 pass # Use the original, slow versions