Doc/lib/libstring.tex

   1 \section{\module{string} ---
   2          Common string operations}
   3
   4 \declaremodule{standard}{string}
   5 \modulesynopsis{Common string operations.}
   6
   7
   8 This module defines some constants useful for checking character
   9 classes and some useful string functions.  See the module
  10 \refmodule{re}\refstmodindex{re} for string functions based on regular
  11 expressions.
  12
  13 The constants defined in this module are:
  14
  15 \begin{datadesc}{digits}
  16   The string \code{'0123456789'}.
  17 \end{datadesc}
  18
  19 \begin{datadesc}{hexdigits}
  20   The string \code{'0123456789abcdefABCDEF'}.
  21 \end{datadesc}
  22
  23 \begin{datadesc}{letters}
  24   The concatenation of the strings \constant{lowercase} and
  25   \constant{uppercase} described below.
  26 \end{datadesc}
  27
  28 \begin{datadesc}{lowercase}
  29   A string containing all the characters that are considered lowercase
  30   letters.  On most systems this is the string
  31   \code{'abcdefghijklmnopqrstuvwxyz'}.  Do not change its definition ---
  32   the effect on the routines \function{upper()} and
  33   \function{swapcase()} is undefined.
  34 \end{datadesc}
  35
  36 \begin{datadesc}{octdigits}
  37   The string \code{'01234567'}.
  38 \end{datadesc}
  39
  40 \begin{datadesc}{punctuation}
  41   String of \ASCII{} characters which are considered punctuation
  42   characters in the \samp{C} locale.
  43 \end{datadesc}
  44
  45 \begin{datadesc}{printable}
  46   String of characters which are considered printable.  This is a
  47   combination of \constant{digits}, \constant{letters},
  48   \constant{punctuation}, and \constant{whitespace}.
  49 \end{datadesc}
  50
  51 \begin{datadesc}{uppercase}
  52   A string containing all the characters that are considered uppercase
  53   letters.  On most systems this is the string
  54   \code{'ABCDEFGHIJKLMNOPQRSTUVWXYZ'}.  Do not change its definition ---
  55   the effect on the routines \function{lower()} and
  56   \function{swapcase()} is undefined.
  57 \end{datadesc}
  58
  59 \begin{datadesc}{whitespace}
  60   A string containing all characters that are considered whitespace.
  61   On most systems this includes the characters space, tab, linefeed,
  62   return, formfeed, and vertical tab.  Do not change its definition ---
  63   the effect on the routines \function{strip()} and \function{split()}
  64   is undefined.
  65 \end{datadesc}
  66
  67
  68 Many of the functions provided by this module are also defined as
  69 methods of string and Unicode objects; see ``String Methods'' (section
  70 \ref{string-methods}) for more information on those.
  71 The functions defined in this module are:
  72
  73 \begin{funcdesc}{atof}{s}
  74   \deprecated{2.0}{Use the \function{float()} built-in function.}
  75   Convert a string to a floating point number.  The string must have
  76   the standard syntax for a floating point literal in Python,
  77   optionally preceded by a sign (\samp{+} or \samp{-}).  Note that
  78   this behaves identical to the built-in function
  79   \function{float()}\bifuncindex{float} when passed a string.
  80
  81   \strong{Note:} When passing in a string, values for NaN\index{NaN}
  82   and Infinity\index{Infinity} may be returned, depending on the
  83   underlying C library.  The specific set of strings accepted which
  84   cause these values to be returned depends entirely on the C library
  85   and is known to vary.
  86 \end{funcdesc}
  87
  88 \begin{funcdesc}{atoi}{s\optional{, base}}
  89   \deprecated{2.0}{Use the \function{int()} built-in function.}
  90   Convert string \var{s} to an integer in the given \var{base}.  The
  91   string must consist of one or more digits, optionally preceded by a
  92   sign (\samp{+} or \samp{-}).  The \var{base} defaults to 10.  If it
  93   is 0, a default base is chosen depending on the leading characters
  94   of the string (after stripping the sign): \samp{0x} or \samp{0X}
  95   means 16, \samp{0} means 8, anything else means 10.  If \var{base}
  96   is 16, a leading \samp{0x} or \samp{0X} is always accepted, though
  97   not required.  This behaves identically to the built-in function
  98   \function{int()} when passed a string.  (Also note: for a more
  99   flexible interpretation of numeric literals, use the built-in
 100   function \function{eval()}\bifuncindex{eval}.)
 101 \end{funcdesc}
 102
 103 \begin{funcdesc}{atol}{s\optional{, base}}
 104   \deprecated{2.0}{Use the \function{long()} built-in function.}
 105   Convert string \var{s} to a long integer in the given \var{base}.
 106   The string must consist of one or more digits, optionally preceded
 107   by a sign (\samp{+} or \samp{-}).  The \var{base} argument has the
 108   same meaning as for \function{atoi()}.  A trailing \samp{l} or
 109   \samp{L} is not allowed, except if the base is 0.  Note that when
 110   invoked without \var{base} or with \var{base} set to 10, this
 111   behaves identical to the built-in function
 112   \function{long()}\bifuncindex{long} when passed a string.
 113 \end{funcdesc}
 114
 115 \begin{funcdesc}{capitalize}{word}
 116   Capitalize the first character of the argument.
 117 \end{funcdesc}
 118
 119 \begin{funcdesc}{capwords}{s}
 120   Split the argument into words using \function{split()}, capitalize
 121   each word using \function{capitalize()}, and join the capitalized
 122   words using \function{join()}.  Note that this replaces runs of
 123   whitespace characters by a single space, and removes leading and
 124   trailing whitespace.
 125 \end{funcdesc}
 126
 127 \begin{funcdesc}{expandtabs}{s\optional{, tabsize}}
 128   Expand tabs in a string, i.e.\ replace them by one or more spaces,
 129   depending on the current column and the given tab size.  The column
 130   number is reset to zero after each newline occurring in the string.
 131   This doesn't understand other non-printing characters or escape
 132   sequences.  The tab size defaults to 8.
 133 \end{funcdesc}
 134
 135 \begin{funcdesc}{find}{s, sub\optional{, start\optional{,end}}}
 136   Return the lowest index in \var{s} where the substring \var{sub} is
 137   found such that \var{sub} is wholly contained in
 138   \code{\var{s}[\var{start}:\var{end}]}.  Return \code{-1} on failure.
 139   Defaults for \var{start} and \var{end} and interpretation of
 140   negative values is the same as for slices.
 141 \end{funcdesc}
 142
 143 \begin{funcdesc}{rfind}{s, sub\optional{, start\optional{, end}}}
 144   Like \function{find()} but find the highest index.
 145 \end{funcdesc}
 146
 147 \begin{funcdesc}{index}{s, sub\optional{, start\optional{, end}}}
 148   Like \function{find()} but raise \exception{ValueError} when the
 149   substring is not found.
 150 \end{funcdesc}
 151
 152 \begin{funcdesc}{rindex}{s, sub\optional{, start\optional{, end}}}
 153   Like \function{rfind()} but raise \exception{ValueError} when the
 154   substring is not found.
 155 \end{funcdesc}
 156
 157 \begin{funcdesc}{count}{s, sub\optional{, start\optional{, end}}}
 158   Return the number of (non-overlapping) occurrences of substring
 159   \var{sub} in string \code{\var{s}[\var{start}:\var{end}]}.
 160   Defaults for \var{start} and \var{end} and interpretation of
 161   negative values are the same as for slices.
 162 \end{funcdesc}
 163
 164 \begin{funcdesc}{lower}{s}
 165   Return a copy of \var{s}, but with upper case letters converted to
 166   lower case.
 167 \end{funcdesc}
 168
 169 \begin{funcdesc}{maketrans}{from, to}
 170   Return a translation table suitable for passing to
 171   \function{translate()} or \function{regex.compile()}, that will map
 172   each character in \var{from} into the character at the same position
 173   in \var{to}; \var{from} and \var{to} must have the same length.
 174
 175   \strong{Warning:} don't use strings derived from \constant{lowercase}
 176   and \constant{uppercase} as arguments; in some locales, these don't have
 177   the same length.  For case conversions, always use
 178   \function{lower()} and \function{upper()}.
 179 \end{funcdesc}
 180
 181 \begin{funcdesc}{split}{s\optional{, sep\optional{, maxsplit}}}
 182   Return a list of the words of the string \var{s}.  If the optional
 183   second argument \var{sep} is absent or \code{None}, the words are
 184   separated by arbitrary strings of whitespace characters (space, tab,
 185   newline, return, formfeed).  If the second argument \var{sep} is
 186   present and not \code{None}, it specifies a string to be used as the
 187   word separator.  The returned list will then have one more item
 188   than the number of non-overlapping occurrences of the separator in
 189   the string.  The optional third argument \var{maxsplit} defaults to
 190   0.  If it is nonzero, at most \var{maxsplit} number of splits occur,
 191   and the remainder of the string is returned as the final element of
 192   the list (thus, the list will have at most \code{\var{maxsplit}+1}
 193   elements).
 194 \end{funcdesc}
 195
 196 \begin{funcdesc}{splitfields}{s\optional{, sep\optional{, maxsplit}}}
 197   This function behaves identically to \function{split()}.  (In the
 198   past, \function{split()} was only used with one argument, while
 199   \function{splitfields()} was only used with two arguments.)
 200 \end{funcdesc}
 201
 202 \begin{funcdesc}{join}{words\optional{, sep}}
 203   Concatenate a list or tuple of words with intervening occurrences of
 204   \var{sep}.  The default value for \var{sep} is a single space
 205   character.  It is always true that
 206   \samp{string.join(string.split(\var{s}, \var{sep}), \var{sep})}
 207   equals \var{s}.
 208 \end{funcdesc}
 209
 210 \begin{funcdesc}{joinfields}{words\optional{, sep}}
 211   This function behaves identical to \function{join()}.  (In the past,
 212   \function{join()} was only used with one argument, while
 213   \function{joinfields()} was only used with two arguments.)
 214 \end{funcdesc}
 215
 216 \begin{funcdesc}{lstrip}{s}
 217   Return a copy of \var{s} but without leading whitespace characters.
 218 \end{funcdesc}
 219
 220 \begin{funcdesc}{rstrip}{s}
 221   Return a copy of \var{s} but without trailing whitespace
 222   characters.
 223 \end{funcdesc}
 224
 225 \begin{funcdesc}{strip}{s}
 226   Return a copy of \var{s} without leading or trailing whitespace.
 227 \end{funcdesc}
 228
 229 \begin{funcdesc}{swapcase}{s}
 230   Return a copy of \var{s}, but with lower case letters
 231   converted to upper case and vice versa.
 232 \end{funcdesc}
 233
 234 \begin{funcdesc}{translate}{s, table\optional{, deletechars}}
 235   Delete all characters from \var{s} that are in \var{deletechars} (if
 236   present), and then translate the characters using \var{table}, which
 237   must be a 256-character string giving the translation for each
 238   character value, indexed by its ordinal.
 239 \end{funcdesc}
 240
 241 \begin{funcdesc}{upper}{s}
 242   Return a copy of \var{s}, but with lower case letters converted to
 243   upper case.
 244 \end{funcdesc}
 245
 246 \begin{funcdesc}{ljust}{s, width}
 247 \funcline{rjust}{s, width}
 248 \funcline{center}{s, width}
 249   These functions respectively left-justify, right-justify and center
 250   a string in a field of given width.  They return a string that is at
 251   least \var{width} characters wide, created by padding the string
 252   \var{s} with spaces until the given width on the right, left or both
 253   sides.  The string is never truncated.
 254 \end{funcdesc}
 255
 256 \begin{funcdesc}{zfill}{s, width}
 257   Pad a numeric string on the left with zero digits until the given
 258   width is reached.  Strings starting with a sign are handled
 259   correctly.
 260 \end{funcdesc}
 261
 262 \begin{funcdesc}{replace}{str, old, new\optional{, maxsplit}}
 263   Return a copy of string \var{str} with all occurrences of substring
 264   \var{old} replaced by \var{new}.  If the optional argument
 265   \var{maxsplit} is given, the first \var{maxsplit} occurrences are
 266   replaced.
 267 \end{funcdesc}
 268
 269 This module is implemented in Python.  Much of its functionality has
 270 been reimplemented in the built-in module
 271 \module{strop}\refbimodindex{strop}.  However, you
 272 should \emph{never} import the latter module directly.  When
 273 \module{string} discovers that \module{strop} exists, it transparently
 274 replaces parts of itself with the implementation from \module{strop}.
 275 After initialization, there is \emph{no} overhead in using
 276 \module{string} instead of \module{strop}.