Doc/lib/libstring.tex

   1 \section{\module{string} ---
   2          Common string operations}
   3
   4 \declaremodule{standard}{string}
   5 \modulesynopsis{Common string operations.}
   6
   7
   8 This module defines some constants useful for checking character
   9 classes and some useful string functions.  See the module
  10 \refmodule{re}\refstmodindex{re} for string functions based on regular
  11 expressions.
  12
  13 The constants defined in this module are are:
  14
  15 \begin{datadesc}{digits}
  16   The string \code{'0123456789'}.
  17 \end{datadesc}
  18
  19 \begin{datadesc}{hexdigits}
  20   The string \code{'0123456789abcdefABCDEF'}.
  21 \end{datadesc}
  22
  23 \begin{datadesc}{letters}
  24   The concatenation of the strings \function{lowercase()} and
  25   \function{uppercase()} described below.
  26 \end{datadesc}
  27
  28 \begin{datadesc}{lowercase}
  29   A string containing all the characters that are considered lowercase
  30   letters.  On most systems this is the string
  31   \code{'abcdefghijklmnopqrstuvwxyz'}.  Do not change its definition ---
  32   the effect on the routines \function{upper()} and
  33   \function{swapcase()} is undefined.
  34 \end{datadesc}
  35
  36 \begin{datadesc}{octdigits}
  37   The string \code{'01234567'}.
  38 \end{datadesc}
  39
  40 \begin{datadesc}{uppercase}
  41   A string containing all the characters that are considered uppercase
  42   letters.  On most systems this is the string
  43   \code{'ABCDEFGHIJKLMNOPQRSTUVWXYZ'}.  Do not change its definition ---
  44   the effect on the routines \function{lower()} and
  45   \function{swapcase()} is undefined.
  46 \end{datadesc}
  47
  48 \begin{datadesc}{whitespace}
  49   A string containing all characters that are considered whitespace.
  50   On most systems this includes the characters space, tab, linefeed,
  51   return, formfeed, and vertical tab.  Do not change its definition ---
  52   the effect on the routines \function{strip()} and \function{split()}
  53   is undefined.
  54 \end{datadesc}
  55
  56 The functions defined in this module are:
  57
  58
  59 \begin{funcdesc}{atof}{s}
  60   Convert a string to a floating point number.  The string must have
  61   the standard syntax for a floating point literal in Python,
  62   optionally preceded by a sign (\samp{+} or \samp{-}).  Note that
  63   this behaves identical to the built-in function
  64   \function{float()}\bifuncindex{float} when passed a string.
  65
  66   \strong{Note:} When passing in a string, values for NaN\index{NaN}
  67   and Infinity\index{Infinity} may be returned, depending on the
  68   underlying C library.  The specific set of strings accepted which
  69   cause these values to be returned depends entirely on the C library
  70   and is known to vary.
  71 \end{funcdesc}
  72
  73 \begin{funcdesc}{atoi}{s\optional{, base}}
  74   Convert string \var{s} to an integer in the given \var{base}.  The
  75   string must consist of one or more digits, optionally preceded by a
  76   sign (\samp{+} or \samp{-}).  The \var{base} defaults to 10.  If it
  77   is 0, a default base is chosen depending on the leading characters
  78   of the string (after stripping the sign): \samp{0x} or \samp{0X}
  79   means 16, \samp{0} means 8, anything else means 10.  If \var{base}
  80   is 16, a leading \samp{0x} or \samp{0X} is always accepted.  Note
  81   that when invoked without \var{base} or with \var{base} set to 10,
  82   this behaves identical to the built-in function \function{int()}
  83   when passed a string.  (Also note: for a more flexible
  84   interpretation of numeric literals, use the built-in function
  85   \function{eval()}\bifuncindex{eval}.)
  86 \end{funcdesc}
  87
  88 \begin{funcdesc}{atol}{s\optional{, base}}
  89   Convert string \var{s} to a long integer in the given \var{base}.
  90   The string must consist of one or more digits, optionally preceded
  91   by a sign (\samp{+} or \samp{-}).  The \var{base} argument has the
  92   same meaning as for \function{atoi()}.  A trailing \samp{l} or
  93   \samp{L} is not allowed, except if the base is 0.  Note that when
  94   invoked without \var{base} or with \var{base} set to 10, this
  95   behaves identical to the built-in function
  96   \function{long()}\bifuncindex{long} when passed a string.
  97 \end{funcdesc}
  98
  99 \begin{funcdesc}{capitalize}{word}
 100   Capitalize the first character of the argument.
 101 \end{funcdesc}
 102
 103 \begin{funcdesc}{capwords}{s}
 104   Split the argument into words using \function{split()}, capitalize
 105   each word using \function{capitalize()}, and join the capitalized
 106   words using \function{join()}.  Note that this replaces runs of
 107   whitespace characters by a single space, and removes leading and
 108   trailing whitespace.
 109 \end{funcdesc}
 110
 111 \begin{funcdesc}{expandtabs}{s, \optional{tabsize}}
 112   Expand tabs in a string, i.e.\ replace them by one or more spaces,
 113   depending on the current column and the given tab size.  The column
 114   number is reset to zero after each newline occurring in the string.
 115   This doesn't understand other non-printing characters or escape
 116   sequences.  The tab size defaults to 8.
 117 \end{funcdesc}
 118
 119 \begin{funcdesc}{find}{s, sub\optional{, start\optional{,end}}}
 120   Return the lowest index in \var{s} where the substring \var{sub} is
 121   found such that \var{sub} is wholly contained in
 122   \code{\var{s}[\var{start}:\var{end}]}.  Return \code{-1} on failure.
 123   Defaults for \var{start} and \var{end} and interpretation of
 124   negative values is the same as for slices.
 125 \end{funcdesc}
 126
 127 \begin{funcdesc}{rfind}{s, sub\optional{, start\optional{, end}}}
 128   Like \function{find()} but find the highest index.
 129 \end{funcdesc}
 130
 131 \begin{funcdesc}{index}{s, sub\optional{, start\optional{, end}}}
 132   Like \function{find()} but raise \exception{ValueError} when the
 133   substring is not found.
 134 \end{funcdesc}
 135
 136 \begin{funcdesc}{rindex}{s, sub\optional{, start\optional{, end}}}
 137   Like \function{rfind()} but raise \exception{ValueError} when the
 138   substring is not found.
 139 \end{funcdesc}
 140
 141 \begin{funcdesc}{count}{s, sub\optional{, start\optional{, end}}}
 142   Return the number of (non-overlapping) occurrences of substring
 143   \var{sub} in string \code{\var{s}[\var{start}:\var{end}]}.
 144   Defaults for \var{start} and \var{end} and interpretation of
 145   negative values is the same as for slices.
 146 \end{funcdesc}
 147
 148 \begin{funcdesc}{lower}{s}
 149   Return a copy of \var{s}, but with upper case letters converted to
 150   lower case.
 151 \end{funcdesc}
 152
 153 \begin{funcdesc}{maketrans}{from, to}
 154   Return a translation table suitable for passing to
 155   \function{translate()} or \function{regex.compile()}, that will map
 156   each character in \var{from} into the character at the same position
 157   in \var{to}; \var{from} and \var{to} must have the same length.
 158
 159   \strong{Warning:} don't use strings derived from \code{lowercase}
 160   and \code{uppercase} as arguments; in some locales, these don't have
 161   the same length.  For case conversions, always use
 162   \function{lower()} and \function{upper()}.
 163 \end{funcdesc}
 164
 165 \begin{funcdesc}{split}{s\optional{, sep\optional{, maxsplit}}}
 166   Return a list of the words of the string \var{s}.  If the optional
 167   second argument \var{sep} is absent or \code{None}, the words are
 168   separated by arbitrary strings of whitespace characters (space, tab,
 169   newline, return, formfeed).  If the second argument \var{sep} is
 170   present and not \code{None}, it specifies a string to be used as the
 171   word separator.  The returned list will then have one more item
 172   than the number of non-overlapping occurrences of the separator in
 173   the string.  The optional third argument \var{maxsplit} defaults to
 174   0.  If it is nonzero, at most \var{maxsplit} number of splits occur,
 175   and the remainder of the string is returned as the final element of
 176   the list (thus, the list will have at most \code{\var{maxsplit}+1}
 177   elements).
 178 \end{funcdesc}
 179
 180 \begin{funcdesc}{splitfields}{s\optional{, sep\optional{, maxsplit}}}
 181   This function behaves identically to \function{split()}.  (In the
 182   past, \function{split()} was only used with one argument, while
 183   \function{splitfields()} was only used with two arguments.)
 184 \end{funcdesc}
 185
 186 \begin{funcdesc}{join}{words\optional{, sep}}
 187   Concatenate a list or tuple of words with intervening occurrences of
 188   \var{sep}.  The default value for \var{sep} is a single space
 189   character.  It is always true that
 190   \samp{string.join(string.split(\var{s}, \var{sep}), \var{sep})}
 191   equals \var{s}.
 192 \end{funcdesc}
 193
 194 \begin{funcdesc}{joinfields}{words\optional{, sep}}
 195   This function behaves identical to \function{join()}.  (In the past,
 196   \function{join()} was only used with one argument, while
 197   \function{joinfields()} was only used with two arguments.)
 198 \end{funcdesc}
 199
 200 \begin{funcdesc}{lstrip}{s}
 201   Return a copy of \var{s} but without leading whitespace characters.
 202 \end{funcdesc}
 203
 204 \begin{funcdesc}{rstrip}{s}
 205   Return a copy of \var{s} but without trailing whitespace
 206   characters.
 207 \end{funcdesc}
 208
 209 \begin{funcdesc}{strip}{s}
 210   Return a copy of \var{s} without leading or trailing whitespace.
 211 \end{funcdesc}
 212
 213 \begin{funcdesc}{swapcase}{s}
 214   Return a copy of \var{s}, but with lower case letters
 215   converted to upper case and vice versa.
 216 \end{funcdesc}
 217
 218 \begin{funcdesc}{translate}{s, table\optional{, deletechars}}
 219   Delete all characters from \var{s} that are in \var{deletechars} (if
 220   present), and then translate the characters using \var{table}, which
 221   must be a 256-character string giving the translation for each
 222   character value, indexed by its ordinal.
 223 \end{funcdesc}
 224
 225 \begin{funcdesc}{upper}{s}
 226   Return a copy of \var{s}, but with lower case letters converted to
 227   upper case.
 228 \end{funcdesc}
 229
 230 \begin{funcdesc}{ljust}{s, width}
 231 \funcline{rjust}{s, width}
 232 \funcline{center}{s, width}
 233   These functions respectively left-justify, right-justify and center
 234   a string in a field of given width.  They return a string that is at
 235   least \var{width} characters wide, created by padding the string
 236   \var{s} with spaces until the given width on the right, left or both
 237   sides.  The string is never truncated.
 238 \end{funcdesc}
 239
 240 \begin{funcdesc}{zfill}{s, width}
 241   Pad a numeric string on the left with zero digits until the given
 242   width is reached.  Strings starting with a sign are handled
 243   correctly.
 244 \end{funcdesc}
 245
 246 \begin{funcdesc}{replace}{str, old, new\optional{, maxsplit}}
 247   Return a copy of string \var{str} with all occurrences of substring
 248   \var{old} replaced by \var{new}.  If the optional argument
 249   \var{maxsplit} is given, the first \var{maxsplit} occurrences are
 250   replaced.
 251 \end{funcdesc}
 252
 253 This module is implemented in Python.  Much of its functionality has
 254 been reimplemented in the built-in module
 255 \module{strop}\refbimodindex{strop}.  However, you
 256 should \emph{never} import the latter module directly.  When
 257 \module{string} discovers that \module{strop} exists, it transparently
 258 replaces parts of itself with the implementation from \module{strop}.
 259 After initialization, there is \emph{no} overhead in using
 260 \module{string} instead of \module{strop}.