The 0.5 release happened on 2/15, not on 2/14. :-)
[python/dscho.git] / Doc / lib / libstring.tex
blob32b7c27b0028e0e238323e77ac1ccb059c6b632e
1 \section{\module{string} ---
2 Common string operations}
4 \declaremodule{standard}{string}
5 \modulesynopsis{Common string operations.}
8 This module defines some constants useful for checking character
9 classes and some useful string functions. See the module
10 \refmodule{re}\refstmodindex{re} for string functions based on regular
11 expressions.
13 The constants defined in this module are are:
15 \begin{datadesc}{digits}
16 The string \code{'0123456789'}.
17 \end{datadesc}
19 \begin{datadesc}{hexdigits}
20 The string \code{'0123456789abcdefABCDEF'}.
21 \end{datadesc}
23 \begin{datadesc}{letters}
24 The concatenation of the strings \function{lowercase()} and
25 \function{uppercase()} described below.
26 \end{datadesc}
28 \begin{datadesc}{lowercase}
29 A string containing all the characters that are considered lowercase
30 letters. On most systems this is the string
31 \code{'abcdefghijklmnopqrstuvwxyz'}. Do not change its definition ---
32 the effect on the routines \function{upper()} and
33 \function{swapcase()} is undefined.
34 \end{datadesc}
36 \begin{datadesc}{octdigits}
37 The string \code{'01234567'}.
38 \end{datadesc}
40 \begin{datadesc}{uppercase}
41 A string containing all the characters that are considered uppercase
42 letters. On most systems this is the string
43 \code{'ABCDEFGHIJKLMNOPQRSTUVWXYZ'}. Do not change its definition ---
44 the effect on the routines \function{lower()} and
45 \function{swapcase()} is undefined.
46 \end{datadesc}
48 \begin{datadesc}{whitespace}
49 A string containing all characters that are considered whitespace.
50 On most systems this includes the characters space, tab, linefeed,
51 return, formfeed, and vertical tab. Do not change its definition ---
52 the effect on the routines \function{strip()} and \function{split()}
53 is undefined.
54 \end{datadesc}
56 The functions defined in this module are:
59 \begin{funcdesc}{atof}{s}
60 Convert a string to a floating point number. The string must have
61 the standard syntax for a floating point literal in Python,
62 optionally preceded by a sign (\samp{+} or \samp{-}). Note that
63 this behaves identical to the built-in function
64 \function{float()}\bifuncindex{float} when passed a string.
66 \strong{Note:} When passing in a string, values for NaN\index{NaN}
67 and Infinity\index{Infinity} may be returned, depending on the
68 underlying C library. The specific set of strings accepted which
69 cause these values to be returned depends entirely on the C library
70 and is known to vary.
71 \end{funcdesc}
73 \begin{funcdesc}{atoi}{s\optional{, base}}
74 Convert string \var{s} to an integer in the given \var{base}. The
75 string must consist of one or more digits, optionally preceded by a
76 sign (\samp{+} or \samp{-}). The \var{base} defaults to 10. If it
77 is 0, a default base is chosen depending on the leading characters
78 of the string (after stripping the sign): \samp{0x} or \samp{0X}
79 means 16, \samp{0} means 8, anything else means 10. If \var{base}
80 is 16, a leading \samp{0x} or \samp{0X} is always accepted. Note
81 that when invoked without \var{base} or with \var{base} set to 10,
82 this behaves identical to the built-in function \function{int()}
83 when passed a string. (Also note: for a more flexible
84 interpretation of numeric literals, use the built-in function
85 \function{eval()}\bifuncindex{eval}.)
86 \end{funcdesc}
88 \begin{funcdesc}{atol}{s\optional{, base}}
89 Convert string \var{s} to a long integer in the given \var{base}.
90 The string must consist of one or more digits, optionally preceded
91 by a sign (\samp{+} or \samp{-}). The \var{base} argument has the
92 same meaning as for \function{atoi()}. A trailing \samp{l} or
93 \samp{L} is not allowed, except if the base is 0. Note that when
94 invoked without \var{base} or with \var{base} set to 10, this
95 behaves identical to the built-in function
96 \function{long()}\bifuncindex{long} when passed a string.
97 \end{funcdesc}
99 \begin{funcdesc}{capitalize}{word}
100 Capitalize the first character of the argument.
101 \end{funcdesc}
103 \begin{funcdesc}{capwords}{s}
104 Split the argument into words using \function{split()}, capitalize
105 each word using \function{capitalize()}, and join the capitalized
106 words using \function{join()}. Note that this replaces runs of
107 whitespace characters by a single space, and removes leading and
108 trailing whitespace.
109 \end{funcdesc}
111 \begin{funcdesc}{expandtabs}{s, \optional{tabsize}}
112 Expand tabs in a string, i.e.\ replace them by one or more spaces,
113 depending on the current column and the given tab size. The column
114 number is reset to zero after each newline occurring in the string.
115 This doesn't understand other non-printing characters or escape
116 sequences. The tab size defaults to 8.
117 \end{funcdesc}
119 \begin{funcdesc}{find}{s, sub\optional{, start\optional{,end}}}
120 Return the lowest index in \var{s} where the substring \var{sub} is
121 found such that \var{sub} is wholly contained in
122 \code{\var{s}[\var{start}:\var{end}]}. Return \code{-1} on failure.
123 Defaults for \var{start} and \var{end} and interpretation of
124 negative values is the same as for slices.
125 \end{funcdesc}
127 \begin{funcdesc}{rfind}{s, sub\optional{, start\optional{, end}}}
128 Like \function{find()} but find the highest index.
129 \end{funcdesc}
131 \begin{funcdesc}{index}{s, sub\optional{, start\optional{, end}}}
132 Like \function{find()} but raise \exception{ValueError} when the
133 substring is not found.
134 \end{funcdesc}
136 \begin{funcdesc}{rindex}{s, sub\optional{, start\optional{, end}}}
137 Like \function{rfind()} but raise \exception{ValueError} when the
138 substring is not found.
139 \end{funcdesc}
141 \begin{funcdesc}{count}{s, sub\optional{, start\optional{, end}}}
142 Return the number of (non-overlapping) occurrences of substring
143 \var{sub} in string \code{\var{s}[\var{start}:\var{end}]}.
144 Defaults for \var{start} and \var{end} and interpretation of
145 negative values is the same as for slices.
146 \end{funcdesc}
148 \begin{funcdesc}{lower}{s}
149 Return a copy of \var{s}, but with upper case letters converted to
150 lower case.
151 \end{funcdesc}
153 \begin{funcdesc}{maketrans}{from, to}
154 Return a translation table suitable for passing to
155 \function{translate()} or \function{regex.compile()}, that will map
156 each character in \var{from} into the character at the same position
157 in \var{to}; \var{from} and \var{to} must have the same length.
159 \strong{Warning:} don't use strings derived from \code{lowercase}
160 and \code{uppercase} as arguments; in some locales, these don't have
161 the same length. For case conversions, always use
162 \function{lower()} and \function{upper()}.
163 \end{funcdesc}
165 \begin{funcdesc}{split}{s\optional{, sep\optional{, maxsplit}}}
166 Return a list of the words of the string \var{s}. If the optional
167 second argument \var{sep} is absent or \code{None}, the words are
168 separated by arbitrary strings of whitespace characters (space, tab,
169 newline, return, formfeed). If the second argument \var{sep} is
170 present and not \code{None}, it specifies a string to be used as the
171 word separator. The returned list will then have one more item
172 than the number of non-overlapping occurrences of the separator in
173 the string. The optional third argument \var{maxsplit} defaults to
174 0. If it is nonzero, at most \var{maxsplit} number of splits occur,
175 and the remainder of the string is returned as the final element of
176 the list (thus, the list will have at most \code{\var{maxsplit}+1}
177 elements).
178 \end{funcdesc}
180 \begin{funcdesc}{splitfields}{s\optional{, sep\optional{, maxsplit}}}
181 This function behaves identically to \function{split()}. (In the
182 past, \function{split()} was only used with one argument, while
183 \function{splitfields()} was only used with two arguments.)
184 \end{funcdesc}
186 \begin{funcdesc}{join}{words\optional{, sep}}
187 Concatenate a list or tuple of words with intervening occurrences of
188 \var{sep}. The default value for \var{sep} is a single space
189 character. It is always true that
190 \samp{string.join(string.split(\var{s}, \var{sep}), \var{sep})}
191 equals \var{s}.
192 \end{funcdesc}
194 \begin{funcdesc}{joinfields}{words\optional{, sep}}
195 This function behaves identical to \function{join()}. (In the past,
196 \function{join()} was only used with one argument, while
197 \function{joinfields()} was only used with two arguments.)
198 \end{funcdesc}
200 \begin{funcdesc}{lstrip}{s}
201 Return a copy of \var{s} but without leading whitespace characters.
202 \end{funcdesc}
204 \begin{funcdesc}{rstrip}{s}
205 Return a copy of \var{s} but without trailing whitespace
206 characters.
207 \end{funcdesc}
209 \begin{funcdesc}{strip}{s}
210 Return a copy of \var{s} without leading or trailing whitespace.
211 \end{funcdesc}
213 \begin{funcdesc}{swapcase}{s}
214 Return a copy of \var{s}, but with lower case letters
215 converted to upper case and vice versa.
216 \end{funcdesc}
218 \begin{funcdesc}{translate}{s, table\optional{, deletechars}}
219 Delete all characters from \var{s} that are in \var{deletechars} (if
220 present), and then translate the characters using \var{table}, which
221 must be a 256-character string giving the translation for each
222 character value, indexed by its ordinal.
223 \end{funcdesc}
225 \begin{funcdesc}{upper}{s}
226 Return a copy of \var{s}, but with lower case letters converted to
227 upper case.
228 \end{funcdesc}
230 \begin{funcdesc}{ljust}{s, width}
231 \funcline{rjust}{s, width}
232 \funcline{center}{s, width}
233 These functions respectively left-justify, right-justify and center
234 a string in a field of given width. They return a string that is at
235 least \var{width} characters wide, created by padding the string
236 \var{s} with spaces until the given width on the right, left or both
237 sides. The string is never truncated.
238 \end{funcdesc}
240 \begin{funcdesc}{zfill}{s, width}
241 Pad a numeric string on the left with zero digits until the given
242 width is reached. Strings starting with a sign are handled
243 correctly.
244 \end{funcdesc}
246 \begin{funcdesc}{replace}{str, old, new\optional{, maxsplit}}
247 Return a copy of string \var{str} with all occurrences of substring
248 \var{old} replaced by \var{new}. If the optional argument
249 \var{maxsplit} is given, the first \var{maxsplit} occurrences are
250 replaced.
251 \end{funcdesc}
253 This module is implemented in Python. Much of its functionality has
254 been reimplemented in the built-in module
255 \module{strop}\refbimodindex{strop}. However, you
256 should \emph{never} import the latter module directly. When
257 \module{string} discovers that \module{strop} exists, it transparently
258 replaces parts of itself with the implementation from \module{strop}.
259 After initialization, there is \emph{no} overhead in using
260 \module{string} instead of \module{strop}.