Updated for 2.1a3
[python/dscho.git] / Doc / lib / libstring.tex
blobe95741eb8c31924359b1e3bb43f0a99908fc7b1a
1 \section{\module{string} ---
2 Common string operations}
4 \declaremodule{standard}{string}
5 \modulesynopsis{Common string operations.}
8 This module defines some constants useful for checking character
9 classes and some useful string functions. See the module
10 \refmodule{re}\refstmodindex{re} for string functions based on regular
11 expressions.
13 The constants defined in this module are:
15 \begin{datadesc}{digits}
16 The string \code{'0123456789'}.
17 \end{datadesc}
19 \begin{datadesc}{hexdigits}
20 The string \code{'0123456789abcdefABCDEF'}.
21 \end{datadesc}
23 \begin{datadesc}{letters}
24 The concatenation of the strings \constant{lowercase} and
25 \constant{uppercase} described below.
26 \end{datadesc}
28 \begin{datadesc}{lowercase}
29 A string containing all the characters that are considered lowercase
30 letters. On most systems this is the string
31 \code{'abcdefghijklmnopqrstuvwxyz'}. Do not change its definition ---
32 the effect on the routines \function{upper()} and
33 \function{swapcase()} is undefined.
34 \end{datadesc}
36 \begin{datadesc}{octdigits}
37 The string \code{'01234567'}.
38 \end{datadesc}
40 \begin{datadesc}{punctuation}
41 String of \ASCII{} characters which are considered punctuation
42 characters in the \samp{C} locale.
43 \end{datadesc}
45 \begin{datadesc}{printable}
46 String of characters which are considered printable. This is a
47 combination of \constant{digits}, \constant{letters},
48 \constant{punctuation}, and \constant{whitespace}.
49 \end{datadesc}
51 \begin{datadesc}{uppercase}
52 A string containing all the characters that are considered uppercase
53 letters. On most systems this is the string
54 \code{'ABCDEFGHIJKLMNOPQRSTUVWXYZ'}. Do not change its definition ---
55 the effect on the routines \function{lower()} and
56 \function{swapcase()} is undefined.
57 \end{datadesc}
59 \begin{datadesc}{whitespace}
60 A string containing all characters that are considered whitespace.
61 On most systems this includes the characters space, tab, linefeed,
62 return, formfeed, and vertical tab. Do not change its definition ---
63 the effect on the routines \function{strip()} and \function{split()}
64 is undefined.
65 \end{datadesc}
68 Many of the functions provided by this module are also defined as
69 methods of string and Unicode objects; see ``String Methods'' (section
70 \ref{string-methods}) for more information on those.
71 The functions defined in this module are:
73 \begin{funcdesc}{atof}{s}
74 \deprecated{2.0}{Use the \function{float()} built-in function.}
75 Convert a string to a floating point number. The string must have
76 the standard syntax for a floating point literal in Python,
77 optionally preceded by a sign (\samp{+} or \samp{-}). Note that
78 this behaves identical to the built-in function
79 \function{float()}\bifuncindex{float} when passed a string.
81 \strong{Note:} When passing in a string, values for NaN\index{NaN}
82 and Infinity\index{Infinity} may be returned, depending on the
83 underlying C library. The specific set of strings accepted which
84 cause these values to be returned depends entirely on the C library
85 and is known to vary.
86 \end{funcdesc}
88 \begin{funcdesc}{atoi}{s\optional{, base}}
89 \deprecated{2.0}{Use the \function{int()} built-in function.}
90 Convert string \var{s} to an integer in the given \var{base}. The
91 string must consist of one or more digits, optionally preceded by a
92 sign (\samp{+} or \samp{-}). The \var{base} defaults to 10. If it
93 is 0, a default base is chosen depending on the leading characters
94 of the string (after stripping the sign): \samp{0x} or \samp{0X}
95 means 16, \samp{0} means 8, anything else means 10. If \var{base}
96 is 16, a leading \samp{0x} or \samp{0X} is always accepted, though
97 not required. This behaves identically to the built-in function
98 \function{int()} when passed a string. (Also note: for a more
99 flexible interpretation of numeric literals, use the built-in
100 function \function{eval()}\bifuncindex{eval}.)
101 \end{funcdesc}
103 \begin{funcdesc}{atol}{s\optional{, base}}
104 \deprecated{2.0}{Use the \function{long()} built-in function.}
105 Convert string \var{s} to a long integer in the given \var{base}.
106 The string must consist of one or more digits, optionally preceded
107 by a sign (\samp{+} or \samp{-}). The \var{base} argument has the
108 same meaning as for \function{atoi()}. A trailing \samp{l} or
109 \samp{L} is not allowed, except if the base is 0. Note that when
110 invoked without \var{base} or with \var{base} set to 10, this
111 behaves identical to the built-in function
112 \function{long()}\bifuncindex{long} when passed a string.
113 \end{funcdesc}
115 \begin{funcdesc}{capitalize}{word}
116 Capitalize the first character of the argument.
117 \end{funcdesc}
119 \begin{funcdesc}{capwords}{s}
120 Split the argument into words using \function{split()}, capitalize
121 each word using \function{capitalize()}, and join the capitalized
122 words using \function{join()}. Note that this replaces runs of
123 whitespace characters by a single space, and removes leading and
124 trailing whitespace.
125 \end{funcdesc}
127 \begin{funcdesc}{expandtabs}{s\optional{, tabsize}}
128 Expand tabs in a string, i.e.\ replace them by one or more spaces,
129 depending on the current column and the given tab size. The column
130 number is reset to zero after each newline occurring in the string.
131 This doesn't understand other non-printing characters or escape
132 sequences. The tab size defaults to 8.
133 \end{funcdesc}
135 \begin{funcdesc}{find}{s, sub\optional{, start\optional{,end}}}
136 Return the lowest index in \var{s} where the substring \var{sub} is
137 found such that \var{sub} is wholly contained in
138 \code{\var{s}[\var{start}:\var{end}]}. Return \code{-1} on failure.
139 Defaults for \var{start} and \var{end} and interpretation of
140 negative values is the same as for slices.
141 \end{funcdesc}
143 \begin{funcdesc}{rfind}{s, sub\optional{, start\optional{, end}}}
144 Like \function{find()} but find the highest index.
145 \end{funcdesc}
147 \begin{funcdesc}{index}{s, sub\optional{, start\optional{, end}}}
148 Like \function{find()} but raise \exception{ValueError} when the
149 substring is not found.
150 \end{funcdesc}
152 \begin{funcdesc}{rindex}{s, sub\optional{, start\optional{, end}}}
153 Like \function{rfind()} but raise \exception{ValueError} when the
154 substring is not found.
155 \end{funcdesc}
157 \begin{funcdesc}{count}{s, sub\optional{, start\optional{, end}}}
158 Return the number of (non-overlapping) occurrences of substring
159 \var{sub} in string \code{\var{s}[\var{start}:\var{end}]}.
160 Defaults for \var{start} and \var{end} and interpretation of
161 negative values are the same as for slices.
162 \end{funcdesc}
164 \begin{funcdesc}{lower}{s}
165 Return a copy of \var{s}, but with upper case letters converted to
166 lower case.
167 \end{funcdesc}
169 \begin{funcdesc}{maketrans}{from, to}
170 Return a translation table suitable for passing to
171 \function{translate()} or \function{regex.compile()}, that will map
172 each character in \var{from} into the character at the same position
173 in \var{to}; \var{from} and \var{to} must have the same length.
175 \strong{Warning:} don't use strings derived from \constant{lowercase}
176 and \constant{uppercase} as arguments; in some locales, these don't have
177 the same length. For case conversions, always use
178 \function{lower()} and \function{upper()}.
179 \end{funcdesc}
181 \begin{funcdesc}{split}{s\optional{, sep\optional{, maxsplit}}}
182 Return a list of the words of the string \var{s}. If the optional
183 second argument \var{sep} is absent or \code{None}, the words are
184 separated by arbitrary strings of whitespace characters (space, tab,
185 newline, return, formfeed). If the second argument \var{sep} is
186 present and not \code{None}, it specifies a string to be used as the
187 word separator. The returned list will then have one more item
188 than the number of non-overlapping occurrences of the separator in
189 the string. The optional third argument \var{maxsplit} defaults to
190 0. If it is nonzero, at most \var{maxsplit} number of splits occur,
191 and the remainder of the string is returned as the final element of
192 the list (thus, the list will have at most \code{\var{maxsplit}+1}
193 elements).
194 \end{funcdesc}
196 \begin{funcdesc}{splitfields}{s\optional{, sep\optional{, maxsplit}}}
197 This function behaves identically to \function{split()}. (In the
198 past, \function{split()} was only used with one argument, while
199 \function{splitfields()} was only used with two arguments.)
200 \end{funcdesc}
202 \begin{funcdesc}{join}{words\optional{, sep}}
203 Concatenate a list or tuple of words with intervening occurrences of
204 \var{sep}. The default value for \var{sep} is a single space
205 character. It is always true that
206 \samp{string.join(string.split(\var{s}, \var{sep}), \var{sep})}
207 equals \var{s}.
208 \end{funcdesc}
210 \begin{funcdesc}{joinfields}{words\optional{, sep}}
211 This function behaves identical to \function{join()}. (In the past,
212 \function{join()} was only used with one argument, while
213 \function{joinfields()} was only used with two arguments.)
214 \end{funcdesc}
216 \begin{funcdesc}{lstrip}{s}
217 Return a copy of \var{s} but without leading whitespace characters.
218 \end{funcdesc}
220 \begin{funcdesc}{rstrip}{s}
221 Return a copy of \var{s} but without trailing whitespace
222 characters.
223 \end{funcdesc}
225 \begin{funcdesc}{strip}{s}
226 Return a copy of \var{s} without leading or trailing whitespace.
227 \end{funcdesc}
229 \begin{funcdesc}{swapcase}{s}
230 Return a copy of \var{s}, but with lower case letters
231 converted to upper case and vice versa.
232 \end{funcdesc}
234 \begin{funcdesc}{translate}{s, table\optional{, deletechars}}
235 Delete all characters from \var{s} that are in \var{deletechars} (if
236 present), and then translate the characters using \var{table}, which
237 must be a 256-character string giving the translation for each
238 character value, indexed by its ordinal.
239 \end{funcdesc}
241 \begin{funcdesc}{upper}{s}
242 Return a copy of \var{s}, but with lower case letters converted to
243 upper case.
244 \end{funcdesc}
246 \begin{funcdesc}{ljust}{s, width}
247 \funcline{rjust}{s, width}
248 \funcline{center}{s, width}
249 These functions respectively left-justify, right-justify and center
250 a string in a field of given width. They return a string that is at
251 least \var{width} characters wide, created by padding the string
252 \var{s} with spaces until the given width on the right, left or both
253 sides. The string is never truncated.
254 \end{funcdesc}
256 \begin{funcdesc}{zfill}{s, width}
257 Pad a numeric string on the left with zero digits until the given
258 width is reached. Strings starting with a sign are handled
259 correctly.
260 \end{funcdesc}
262 \begin{funcdesc}{replace}{str, old, new\optional{, maxsplit}}
263 Return a copy of string \var{str} with all occurrences of substring
264 \var{old} replaced by \var{new}. If the optional argument
265 \var{maxsplit} is given, the first \var{maxsplit} occurrences are
266 replaced.
267 \end{funcdesc}
269 This module is implemented in Python. Much of its functionality has
270 been reimplemented in the built-in module
271 \module{strop}\refbimodindex{strop}. However, you
272 should \emph{never} import the latter module directly. When
273 \module{string} discovers that \module{strop} exists, it transparently
274 replaces parts of itself with the implementation from \module{strop}.
275 After initialization, there is \emph{no} overhead in using
276 \module{string} instead of \module{strop}.