This commit was manufactured by cvs2svn to create tag 'r234c1'.
[python/dscho.git] / Doc / lib / libstring.tex
blob3f902cf8403fd13c7ca6101906fcce8337558f19
1 \section{\module{string} ---
2 Common string operations}
4 \declaremodule{standard}{string}
5 \modulesynopsis{Common string operations.}
8 This module defines some constants useful for checking character
9 classes and some useful string functions. See the module
10 \refmodule{re}\refstmodindex{re} for string functions based on regular
11 expressions.
13 The constants defined in this module are:
15 \begin{datadesc}{ascii_letters}
16 The concatenation of the \constant{ascii_lowercase} and
17 \constant{ascii_uppercase} constants described below. This value is
18 not locale-dependent.
19 \end{datadesc}
21 \begin{datadesc}{ascii_lowercase}
22 The lowercase letters \code{'abcdefghijklmnopqrstuvwxyz'}. This
23 value is not locale-dependent and will not change.
24 \end{datadesc}
26 \begin{datadesc}{ascii_uppercase}
27 The uppercase letters \code{'ABCDEFGHIJKLMNOPQRSTUVWXYZ'}. This
28 value is not locale-dependent and will not change.
29 \end{datadesc}
31 \begin{datadesc}{digits}
32 The string \code{'0123456789'}.
33 \end{datadesc}
35 \begin{datadesc}{hexdigits}
36 The string \code{'0123456789abcdefABCDEF'}.
37 \end{datadesc}
39 \begin{datadesc}{letters}
40 The concatenation of the strings \constant{lowercase} and
41 \constant{uppercase} described below. The specific value is
42 locale-dependent, and will be updated when
43 \function{locale.setlocale()} is called.
44 \end{datadesc}
46 \begin{datadesc}{lowercase}
47 A string containing all the characters that are considered lowercase
48 letters. On most systems this is the string
49 \code{'abcdefghijklmnopqrstuvwxyz'}. Do not change its definition ---
50 the effect on the routines \function{upper()} and
51 \function{swapcase()} is undefined. The specific value is
52 locale-dependent, and will be updated when
53 \function{locale.setlocale()} is called.
54 \end{datadesc}
56 \begin{datadesc}{octdigits}
57 The string \code{'01234567'}.
58 \end{datadesc}
60 \begin{datadesc}{punctuation}
61 String of \ASCII{} characters which are considered punctuation
62 characters in the \samp{C} locale.
63 \end{datadesc}
65 \begin{datadesc}{printable}
66 String of characters which are considered printable. This is a
67 combination of \constant{digits}, \constant{letters},
68 \constant{punctuation}, and \constant{whitespace}.
69 \end{datadesc}
71 \begin{datadesc}{uppercase}
72 A string containing all the characters that are considered uppercase
73 letters. On most systems this is the string
74 \code{'ABCDEFGHIJKLMNOPQRSTUVWXYZ'}. Do not change its definition ---
75 the effect on the routines \function{lower()} and
76 \function{swapcase()} is undefined. The specific value is
77 locale-dependent, and will be updated when
78 \function{locale.setlocale()} is called.
79 \end{datadesc}
81 \begin{datadesc}{whitespace}
82 A string containing all characters that are considered whitespace.
83 On most systems this includes the characters space, tab, linefeed,
84 return, formfeed, and vertical tab. Do not change its definition ---
85 the effect on the routines \function{strip()} and \function{split()}
86 is undefined.
87 \end{datadesc}
90 Many of the functions provided by this module are also defined as
91 methods of string and Unicode objects; see ``String Methods'' (section
92 \ref{string-methods}) for more information on those.
93 The functions defined in this module are:
95 \begin{funcdesc}{atof}{s}
96 \deprecated{2.0}{Use the \function{float()} built-in function.}
97 Convert a string to a floating point number. The string must have
98 the standard syntax for a floating point literal in Python,
99 optionally preceded by a sign (\samp{+} or \samp{-}). Note that
100 this behaves identical to the built-in function
101 \function{float()}\bifuncindex{float} when passed a string.
103 \note{When passing in a string, values for NaN\index{NaN}
104 and Infinity\index{Infinity} may be returned, depending on the
105 underlying C library. The specific set of strings accepted which
106 cause these values to be returned depends entirely on the C library
107 and is known to vary.}
108 \end{funcdesc}
110 \begin{funcdesc}{atoi}{s\optional{, base}}
111 \deprecated{2.0}{Use the \function{int()} built-in function.}
112 Convert string \var{s} to an integer in the given \var{base}. The
113 string must consist of one or more digits, optionally preceded by a
114 sign (\samp{+} or \samp{-}). The \var{base} defaults to 10. If it
115 is 0, a default base is chosen depending on the leading characters
116 of the string (after stripping the sign): \samp{0x} or \samp{0X}
117 means 16, \samp{0} means 8, anything else means 10. If \var{base}
118 is 16, a leading \samp{0x} or \samp{0X} is always accepted, though
119 not required. This behaves identically to the built-in function
120 \function{int()} when passed a string. (Also note: for a more
121 flexible interpretation of numeric literals, use the built-in
122 function \function{eval()}\bifuncindex{eval}.)
123 \end{funcdesc}
125 \begin{funcdesc}{atol}{s\optional{, base}}
126 \deprecated{2.0}{Use the \function{long()} built-in function.}
127 Convert string \var{s} to a long integer in the given \var{base}.
128 The string must consist of one or more digits, optionally preceded
129 by a sign (\samp{+} or \samp{-}). The \var{base} argument has the
130 same meaning as for \function{atoi()}. A trailing \samp{l} or
131 \samp{L} is not allowed, except if the base is 0. Note that when
132 invoked without \var{base} or with \var{base} set to 10, this
133 behaves identical to the built-in function
134 \function{long()}\bifuncindex{long} when passed a string.
135 \end{funcdesc}
137 \begin{funcdesc}{capitalize}{word}
138 Return a copy of \var{word} with only its first character capitalized.
139 \end{funcdesc}
141 \begin{funcdesc}{capwords}{s}
142 Split the argument into words using \function{split()}, capitalize
143 each word using \function{capitalize()}, and join the capitalized
144 words using \function{join()}. Note that this replaces runs of
145 whitespace characters by a single space, and removes leading and
146 trailing whitespace.
147 \end{funcdesc}
149 \begin{funcdesc}{expandtabs}{s\optional{, tabsize}}
150 Expand tabs in a string, i.e.\ replace them by one or more spaces,
151 depending on the current column and the given tab size. The column
152 number is reset to zero after each newline occurring in the string.
153 This doesn't understand other non-printing characters or escape
154 sequences. The tab size defaults to 8.
155 \end{funcdesc}
157 \begin{funcdesc}{find}{s, sub\optional{, start\optional{,end}}}
158 Return the lowest index in \var{s} where the substring \var{sub} is
159 found such that \var{sub} is wholly contained in
160 \code{\var{s}[\var{start}:\var{end}]}. Return \code{-1} on failure.
161 Defaults for \var{start} and \var{end} and interpretation of
162 negative values is the same as for slices.
163 \end{funcdesc}
165 \begin{funcdesc}{rfind}{s, sub\optional{, start\optional{, end}}}
166 Like \function{find()} but find the highest index.
167 \end{funcdesc}
169 \begin{funcdesc}{index}{s, sub\optional{, start\optional{, end}}}
170 Like \function{find()} but raise \exception{ValueError} when the
171 substring is not found.
172 \end{funcdesc}
174 \begin{funcdesc}{rindex}{s, sub\optional{, start\optional{, end}}}
175 Like \function{rfind()} but raise \exception{ValueError} when the
176 substring is not found.
177 \end{funcdesc}
179 \begin{funcdesc}{count}{s, sub\optional{, start\optional{, end}}}
180 Return the number of (non-overlapping) occurrences of substring
181 \var{sub} in string \code{\var{s}[\var{start}:\var{end}]}.
182 Defaults for \var{start} and \var{end} and interpretation of
183 negative values are the same as for slices.
184 \end{funcdesc}
186 \begin{funcdesc}{lower}{s}
187 Return a copy of \var{s}, but with upper case letters converted to
188 lower case.
189 \end{funcdesc}
191 \begin{funcdesc}{maketrans}{from, to}
192 Return a translation table suitable for passing to
193 \function{translate()} or \function{regex.compile()}, that will map
194 each character in \var{from} into the character at the same position
195 in \var{to}; \var{from} and \var{to} must have the same length.
197 \warning{Don't use strings derived from \constant{lowercase}
198 and \constant{uppercase} as arguments; in some locales, these don't have
199 the same length. For case conversions, always use
200 \function{lower()} and \function{upper()}.}
201 \end{funcdesc}
203 \begin{funcdesc}{split}{s\optional{, sep\optional{, maxsplit}}}
204 Return a list of the words of the string \var{s}. If the optional
205 second argument \var{sep} is absent or \code{None}, the words are
206 separated by arbitrary strings of whitespace characters (space, tab,
207 newline, return, formfeed). If the second argument \var{sep} is
208 present and not \code{None}, it specifies a string to be used as the
209 word separator. The returned list will then have one more item
210 than the number of non-overlapping occurrences of the separator in
211 the string. The optional third argument \var{maxsplit} defaults to
212 0. If it is nonzero, at most \var{maxsplit} number of splits occur,
213 and the remainder of the string is returned as the final element of
214 the list (thus, the list will have at most \code{\var{maxsplit}+1}
215 elements).
216 \end{funcdesc}
218 \begin{funcdesc}{splitfields}{s\optional{, sep\optional{, maxsplit}}}
219 This function behaves identically to \function{split()}. (In the
220 past, \function{split()} was only used with one argument, while
221 \function{splitfields()} was only used with two arguments.)
222 \end{funcdesc}
224 \begin{funcdesc}{join}{words\optional{, sep}}
225 Concatenate a list or tuple of words with intervening occurrences of
226 \var{sep}. The default value for \var{sep} is a single space
227 character. It is always true that
228 \samp{string.join(string.split(\var{s}, \var{sep}), \var{sep})}
229 equals \var{s}.
230 \end{funcdesc}
232 \begin{funcdesc}{joinfields}{words\optional{, sep}}
233 This function behaves identically to \function{join()}. (In the past,
234 \function{join()} was only used with one argument, while
235 \function{joinfields()} was only used with two arguments.)
236 Note that there is no \method{joinfields()} method on string
237 objects; use the \method{join()} method instead.
238 \end{funcdesc}
240 \begin{funcdesc}{lstrip}{s\optional{, chars}}
241 Return a copy of the string with leading characters removed. If
242 \var{chars} is omitted or \code{None}, whitespace characters are
243 removed. If given and not \code{None}, \var{chars} must be a string;
244 the characters in the string will be stripped from the beginning of
245 the string this method is called on.
246 \versionchanged[The \var{chars} parameter was added. The \var{chars}
247 parameter cannot be passed in earlier 2.2 versions]{2.2.3}
248 \end{funcdesc}
250 \begin{funcdesc}{rstrip}{s\optional{, chars}}
251 Return a copy of the string with trailing characters removed. If
252 \var{chars} is omitted or \code{None}, whitespace characters are
253 removed. If given and not \code{None}, \var{chars} must be a string;
254 the characters in the string will be stripped from the end of the
255 string this method is called on.
256 \versionchanged[The \var{chars} parameter was added. The \var{chars}
257 parameter cannot be passed in 2.2 versions]{2.2.3}
258 \end{funcdesc}
260 \begin{funcdesc}{strip}{s\optional{, chars}}
261 Return a copy of the string with leading and trailing characters
262 removed. If \var{chars} is omitted or \code{None}, whitespace
263 characters are removed. If given and not \code{None}, \var{chars}
264 must be a string; the characters in the string will be stripped from
265 the both ends of the string this method is called on.
266 \versionchanged[The \var{chars} parameter was added. The \var{chars}
267 parameter cannot be passed in earlier 2.2 versions]{2.2.3}
268 \end{funcdesc}
270 \begin{funcdesc}{swapcase}{s}
271 Return a copy of \var{s}, but with lower case letters
272 converted to upper case and vice versa.
273 \end{funcdesc}
275 \begin{funcdesc}{translate}{s, table\optional{, deletechars}}
276 Delete all characters from \var{s} that are in \var{deletechars} (if
277 present), and then translate the characters using \var{table}, which
278 must be a 256-character string giving the translation for each
279 character value, indexed by its ordinal.
280 \end{funcdesc}
282 \begin{funcdesc}{upper}{s}
283 Return a copy of \var{s}, but with lower case letters converted to
284 upper case.
285 \end{funcdesc}
287 \begin{funcdesc}{ljust}{s, width}
288 \funcline{rjust}{s, width}
289 \funcline{center}{s, width}
290 These functions respectively left-justify, right-justify and center
291 a string in a field of given width. They return a string that is at
292 least \var{width} characters wide, created by padding the string
293 \var{s} with spaces until the given width on the right, left or both
294 sides. The string is never truncated.
295 \end{funcdesc}
297 \begin{funcdesc}{zfill}{s, width}
298 Pad a numeric string on the left with zero digits until the given
299 width is reached. Strings starting with a sign are handled
300 correctly.
301 \end{funcdesc}
303 \begin{funcdesc}{replace}{str, old, new\optional{, maxreplace}}
304 Return a copy of string \var{str} with all occurrences of substring
305 \var{old} replaced by \var{new}. If the optional argument
306 \var{maxreplace} is given, the first \var{maxreplace} occurrences are
307 replaced.
308 \end{funcdesc}