Doc/lib/libstring.tex

   1 \section{\module{string} ---
   2          Common string operations}
   3
   4 \declaremodule{standard}{string}
   5 \modulesynopsis{Common string operations.}
   6
   7
   8 This module defines some constants useful for checking character
   9 classes and some useful string functions.  See the module
  10 \refmodule{re}\refstmodindex{re} for string functions based on regular
  11 expressions.
  12
  13 The constants defined in this module are:
  14
  15 \begin{datadesc}{ascii_letters}
  16   The concatenation of the \constant{ascii_lowercase} and
  17   \constant{ascii_uppercase} constants described below.  This value is
  18   not locale-dependent.
  19 \end{datadesc}
  20
  21 \begin{datadesc}{ascii_lowercase}
  22   The lowercase letters \code{'abcdefghijklmnopqrstuvwxyz'}.  This
  23   value is not locale-dependent and will not change.
  24 \end{datadesc}
  25
  26 \begin{datadesc}{ascii_uppercase}
  27   The uppercase letters \code{'ABCDEFGHIJKLMNOPQRSTUVWXYZ'}.  This
  28   value is not locale-dependent and will not change.
  29 \end{datadesc}
  30
  31 \begin{datadesc}{digits}
  32   The string \code{'0123456789'}.
  33 \end{datadesc}
  34
  35 \begin{datadesc}{hexdigits}
  36   The string \code{'0123456789abcdefABCDEF'}.
  37 \end{datadesc}
  38
  39 \begin{datadesc}{letters}
  40   The concatenation of the strings \constant{lowercase} and
  41   \constant{uppercase} described below.  The specific value is
  42   locale-dependent, and will be updated when
  43   \function{locale.setlocale()} is called.
  44 \end{datadesc}
  45
  46 \begin{datadesc}{lowercase}
  47   A string containing all the characters that are considered lowercase
  48   letters.  On most systems this is the string
  49   \code{'abcdefghijklmnopqrstuvwxyz'}.  Do not change its definition ---
  50   the effect on the routines \function{upper()} and
  51   \function{swapcase()} is undefined.  The specific value is
  52   locale-dependent, and will be updated when
  53   \function{locale.setlocale()} is called.
  54 \end{datadesc}
  55
  56 \begin{datadesc}{octdigits}
  57   The string \code{'01234567'}.
  58 \end{datadesc}
  59
  60 \begin{datadesc}{punctuation}
  61   String of \ASCII{} characters which are considered punctuation
  62   characters in the \samp{C} locale.
  63 \end{datadesc}
  64
  65 \begin{datadesc}{printable}
  66   String of characters which are considered printable.  This is a
  67   combination of \constant{digits}, \constant{letters},
  68   \constant{punctuation}, and \constant{whitespace}.
  69 \end{datadesc}
  70
  71 \begin{datadesc}{uppercase}
  72   A string containing all the characters that are considered uppercase
  73   letters.  On most systems this is the string
  74   \code{'ABCDEFGHIJKLMNOPQRSTUVWXYZ'}.  Do not change its definition ---
  75   the effect on the routines \function{lower()} and
  76   \function{swapcase()} is undefined.  The specific value is
  77   locale-dependent, and will be updated when
  78   \function{locale.setlocale()} is called.
  79 \end{datadesc}
  80
  81 \begin{datadesc}{whitespace}
  82   A string containing all characters that are considered whitespace.
  83   On most systems this includes the characters space, tab, linefeed,
  84   return, formfeed, and vertical tab.  Do not change its definition ---
  85   the effect on the routines \function{strip()} and \function{split()}
  86   is undefined.
  87 \end{datadesc}
  88
  89
  90 Many of the functions provided by this module are also defined as
  91 methods of string and Unicode objects; see ``String Methods'' (section
  92 \ref{string-methods}) for more information on those.
  93 The functions defined in this module are:
  94
  95 \begin{funcdesc}{atof}{s}
  96   \deprecated{2.0}{Use the \function{float()} built-in function.}
  97   Convert a string to a floating point number.  The string must have
  98   the standard syntax for a floating point literal in Python,
  99   optionally preceded by a sign (\samp{+} or \samp{-}).  Note that
 100   this behaves identical to the built-in function
 101   \function{float()}\bifuncindex{float} when passed a string.
 102
 103   \note{When passing in a string, values for NaN\index{NaN}
 104   and Infinity\index{Infinity} may be returned, depending on the
 105   underlying C library.  The specific set of strings accepted which
 106   cause these values to be returned depends entirely on the C library
 107   and is known to vary.}
 108 \end{funcdesc}
 109
 110 \begin{funcdesc}{atoi}{s\optional{, base}}
 111   \deprecated{2.0}{Use the \function{int()} built-in function.}
 112   Convert string \var{s} to an integer in the given \var{base}.  The
 113   string must consist of one or more digits, optionally preceded by a
 114   sign (\samp{+} or \samp{-}).  The \var{base} defaults to 10.  If it
 115   is 0, a default base is chosen depending on the leading characters
 116   of the string (after stripping the sign): \samp{0x} or \samp{0X}
 117   means 16, \samp{0} means 8, anything else means 10.  If \var{base}
 118   is 16, a leading \samp{0x} or \samp{0X} is always accepted, though
 119   not required.  This behaves identically to the built-in function
 120   \function{int()} when passed a string.  (Also note: for a more
 121   flexible interpretation of numeric literals, use the built-in
 122   function \function{eval()}\bifuncindex{eval}.)
 123 \end{funcdesc}
 124
 125 \begin{funcdesc}{atol}{s\optional{, base}}
 126   \deprecated{2.0}{Use the \function{long()} built-in function.}
 127   Convert string \var{s} to a long integer in the given \var{base}.
 128   The string must consist of one or more digits, optionally preceded
 129   by a sign (\samp{+} or \samp{-}).  The \var{base} argument has the
 130   same meaning as for \function{atoi()}.  A trailing \samp{l} or
 131   \samp{L} is not allowed, except if the base is 0.  Note that when
 132   invoked without \var{base} or with \var{base} set to 10, this
 133   behaves identical to the built-in function
 134   \function{long()}\bifuncindex{long} when passed a string.
 135 \end{funcdesc}
 136
 137 \begin{funcdesc}{capitalize}{word}
 138   Return a copy of \var{word} with only its first character capitalized.
 139 \end{funcdesc}
 140
 141 \begin{funcdesc}{capwords}{s}
 142   Split the argument into words using \function{split()}, capitalize
 143   each word using \function{capitalize()}, and join the capitalized
 144   words using \function{join()}.  Note that this replaces runs of
 145   whitespace characters by a single space, and removes leading and
 146   trailing whitespace.
 147 \end{funcdesc}
 148
 149 \begin{funcdesc}{expandtabs}{s\optional{, tabsize}}
 150   Expand tabs in a string, i.e.\ replace them by one or more spaces,
 151   depending on the current column and the given tab size.  The column
 152   number is reset to zero after each newline occurring in the string.
 153   This doesn't understand other non-printing characters or escape
 154   sequences.  The tab size defaults to 8.
 155 \end{funcdesc}
 156
 157 \begin{funcdesc}{find}{s, sub\optional{, start\optional{,end}}}
 158   Return the lowest index in \var{s} where the substring \var{sub} is
 159   found such that \var{sub} is wholly contained in
 160   \code{\var{s}[\var{start}:\var{end}]}.  Return \code{-1} on failure.
 161   Defaults for \var{start} and \var{end} and interpretation of
 162   negative values is the same as for slices.
 163 \end{funcdesc}
 164
 165 \begin{funcdesc}{rfind}{s, sub\optional{, start\optional{, end}}}
 166   Like \function{find()} but find the highest index.
 167 \end{funcdesc}
 168
 169 \begin{funcdesc}{index}{s, sub\optional{, start\optional{, end}}}
 170   Like \function{find()} but raise \exception{ValueError} when the
 171   substring is not found.
 172 \end{funcdesc}
 173
 174 \begin{funcdesc}{rindex}{s, sub\optional{, start\optional{, end}}}
 175   Like \function{rfind()} but raise \exception{ValueError} when the
 176   substring is not found.
 177 \end{funcdesc}
 178
 179 \begin{funcdesc}{count}{s, sub\optional{, start\optional{, end}}}
 180   Return the number of (non-overlapping) occurrences of substring
 181   \var{sub} in string \code{\var{s}[\var{start}:\var{end}]}.
 182   Defaults for \var{start} and \var{end} and interpretation of
 183   negative values are the same as for slices.
 184 \end{funcdesc}
 185
 186 \begin{funcdesc}{lower}{s}
 187   Return a copy of \var{s}, but with upper case letters converted to
 188   lower case.
 189 \end{funcdesc}
 190
 191 \begin{funcdesc}{maketrans}{from, to}
 192   Return a translation table suitable for passing to
 193   \function{translate()} or \function{regex.compile()}, that will map
 194   each character in \var{from} into the character at the same position
 195   in \var{to}; \var{from} and \var{to} must have the same length.
 196
 197   \warning{Don't use strings derived from \constant{lowercase}
 198   and \constant{uppercase} as arguments; in some locales, these don't have
 199   the same length.  For case conversions, always use
 200   \function{lower()} and \function{upper()}.}
 201 \end{funcdesc}
 202
 203 \begin{funcdesc}{split}{s\optional{, sep\optional{, maxsplit}}}
 204   Return a list of the words of the string \var{s}.  If the optional
 205   second argument \var{sep} is absent or \code{None}, the words are
 206   separated by arbitrary strings of whitespace characters (space, tab,
 207   newline, return, formfeed).  If the second argument \var{sep} is
 208   present and not \code{None}, it specifies a string to be used as the
 209   word separator.  The returned list will then have one more item
 210   than the number of non-overlapping occurrences of the separator in
 211   the string.  The optional third argument \var{maxsplit} defaults to
 212   0.  If it is nonzero, at most \var{maxsplit} number of splits occur,
 213   and the remainder of the string is returned as the final element of
 214   the list (thus, the list will have at most \code{\var{maxsplit}+1}
 215   elements).
 216 \end{funcdesc}
 217
 218 \begin{funcdesc}{splitfields}{s\optional{, sep\optional{, maxsplit}}}
 219   This function behaves identically to \function{split()}.  (In the
 220   past, \function{split()} was only used with one argument, while
 221   \function{splitfields()} was only used with two arguments.)
 222 \end{funcdesc}
 223
 224 \begin{funcdesc}{join}{words\optional{, sep}}
 225   Concatenate a list or tuple of words with intervening occurrences of
 226   \var{sep}.  The default value for \var{sep} is a single space
 227   character.  It is always true that
 228   \samp{string.join(string.split(\var{s}, \var{sep}), \var{sep})}
 229   equals \var{s}.
 230 \end{funcdesc}
 231
 232 \begin{funcdesc}{joinfields}{words\optional{, sep}}
 233   This function behaves identically to \function{join()}.  (In the past,
 234   \function{join()} was only used with one argument, while
 235   \function{joinfields()} was only used with two arguments.)
 236   Note that there is no \method{joinfields()} method on string
 237   objects; use the \method{join()} method instead.
 238 \end{funcdesc}
 239
 240 \begin{funcdesc}{lstrip}{s\optional{, chars}}
 241 Return a copy of the string with leading characters removed.  If
 242 \var{chars} is omitted or \code{None}, whitespace characters are
 243 removed.  If given and not \code{None}, \var{chars} must be a string;
 244 the characters in the string will be stripped from the beginning of
 245 the string this method is called on.
 246 \versionchanged[The \var{chars} parameter was added.  The \var{chars}
 247 parameter cannot be passed in earlier 2.2 versions]{2.2.3}
 248 \end{funcdesc}
 249
 250 \begin{funcdesc}{rstrip}{s\optional{, chars}}
 251 Return a copy of the string with trailing characters removed.  If
 252 \var{chars} is omitted or \code{None}, whitespace characters are
 253 removed.  If given and not \code{None}, \var{chars} must be a string;
 254 the characters in the string will be stripped from the end of the
 255 string this method is called on.
 256 \versionchanged[The \var{chars} parameter was added.  The \var{chars}
 257 parameter cannot be passed in 2.2 versions]{2.2.3}
 258 \end{funcdesc}
 259
 260 \begin{funcdesc}{strip}{s\optional{, chars}}
 261 Return a copy of the string with leading and trailing characters
 262 removed.  If \var{chars} is omitted or \code{None}, whitespace
 263 characters are removed.  If given and not \code{None}, \var{chars}
 264 must be a string; the characters in the string will be stripped from
 265 the both ends of the string this method is called on.
 266 \versionchanged[The \var{chars} parameter was added.  The \var{chars}
 267 parameter cannot be passed in earlier 2.2 versions]{2.2.3}
 268 \end{funcdesc}
 269
 270 \begin{funcdesc}{swapcase}{s}
 271   Return a copy of \var{s}, but with lower case letters
 272   converted to upper case and vice versa.
 273 \end{funcdesc}
 274
 275 \begin{funcdesc}{translate}{s, table\optional{, deletechars}}
 276   Delete all characters from \var{s} that are in \var{deletechars} (if
 277   present), and then translate the characters using \var{table}, which
 278   must be a 256-character string giving the translation for each
 279   character value, indexed by its ordinal.
 280 \end{funcdesc}
 281
 282 \begin{funcdesc}{upper}{s}
 283   Return a copy of \var{s}, but with lower case letters converted to
 284   upper case.
 285 \end{funcdesc}
 286
 287 \begin{funcdesc}{ljust}{s, width}
 288 \funcline{rjust}{s, width}
 289 \funcline{center}{s, width}
 290   These functions respectively left-justify, right-justify and center
 291   a string in a field of given width.  They return a string that is at
 292   least \var{width} characters wide, created by padding the string
 293   \var{s} with spaces until the given width on the right, left or both
 294   sides.  The string is never truncated.
 295 \end{funcdesc}
 296
 297 \begin{funcdesc}{zfill}{s, width}
 298   Pad a numeric string on the left with zero digits until the given
 299   width is reached.  Strings starting with a sign are handled
 300   correctly.
 301 \end{funcdesc}
 302
 303 \begin{funcdesc}{replace}{str, old, new\optional{, maxreplace}}
 304   Return a copy of string \var{str} with all occurrences of substring
 305   \var{old} replaced by \var{new}.  If the optional argument
 306   \var{maxreplace} is given, the first \var{maxreplace} occurrences are
 307   replaced.
 308 \end{funcdesc}