Lib/string.py

   1 """A collection of string constants.
   2
   3 Public module variables:
   4
   5 whitespace -- a string containing all characters considered whitespace
   6 lowercase -- a string containing all characters considered lowercase letters
   7 uppercase -- a string containing all characters considered uppercase letters
   8 letters -- a string containing all characters considered letters
   9 digits -- a string containing all characters considered decimal digits
  10 hexdigits -- a string containing all characters considered hexadecimal digits
  11 octdigits -- a string containing all characters considered octal digits
  12 punctuation -- a string containing all characters considered punctuation
  13 printable -- a string containing all characters considered printable
  14
  15 """
  16
  17 # Some strings for ctype-style character classification
  18 whitespace = ' \t\n\r\v\f'
  19 ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
  20 ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
  21 ascii_letters = ascii_lowercase + ascii_uppercase
  22 digits = '0123456789'
  23 hexdigits = digits + 'abcdef' + 'ABCDEF'
  24 octdigits = '01234567'
  25 punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
  26 printable = digits + ascii_letters + punctuation + whitespace
  27
  28 # Functions which aren't available as string methods.
  29
  30 # Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
  31 def capwords(s, sep=None):
  32     """capwords(s, [sep]) -> string
  33
  34     Split the argument into words using split, capitalize each
  35     word using capitalize, and join the capitalized words using
  36     join. Note that this replaces runs of whitespace characters by
  37     a single space.
  38
  39     """
  40     return (sep or ' ').join([x.capitalize() for x in s.split(sep)])
  41
  42
  43 # Construct a translation map for bytes.translate
  44 def maketrans(frm: bytes, to: bytes) -> bytes:
  45     """maketrans(frm, to) -> bytes
  46
  47     Return a translation table (a bytes object of length 256)
  48     suitable for use in bytes.translate where each byte in frm is
  49     mapped to the byte at the same position in to.
  50     The strings frm and to must be of the same length.
  51     """
  52     if len(frm) != len(to):
  53         raise ValueError("maketrans arguments must have same length")
  54     if not (isinstance(frm, bytes) and isinstance(to, bytes)):
  55         raise TypeError("maketrans arguments must be bytes objects")
  56     L = bytearray(range(256))
  57     for i, c in enumerate(frm):
  58         L[c] = to[i]
  59     return bytes(L)
  60
  61
  62 ####################################################################
  63 import re as _re
  64
  65 class _multimap:
  66     """Helper class for combining multiple mappings.
  67
  68     Used by .{safe_,}substitute() to combine the mapping and keyword
  69     arguments.
  70     """
  71     def __init__(self, primary, secondary):
  72         self._primary = primary
  73         self._secondary = secondary
  74
  75     def __getitem__(self, key):
  76         try:
  77             return self._primary[key]
  78         except KeyError:
  79             return self._secondary[key]
  80
  81
  82 class _TemplateMetaclass(type):
  83     pattern = r"""
  84     %(delim)s(?:
  85       (?P<escaped>%(delim)s) |   # Escape sequence of two delimiters
  86       (?P<named>%(id)s)      |   # delimiter and a Python identifier
  87       {(?P<braced>%(id)s)}   |   # delimiter and a braced identifier
  88       (?P<invalid>)              # Other ill-formed delimiter exprs
  89     )
  90     """
  91
  92     def __init__(cls, name, bases, dct):
  93         super(_TemplateMetaclass, cls).__init__(name, bases, dct)
  94         if 'pattern' in dct:
  95             pattern = cls.pattern
  96         else:
  97             pattern = _TemplateMetaclass.pattern % {
  98                 'delim' : _re.escape(cls.delimiter),
  99                 'id'    : cls.idpattern,
 100                 }
 101         cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)
 102
 103
 104 class Template(metaclass=_TemplateMetaclass):
 105     """A string class for supporting $-substitutions."""
 106
 107     delimiter = '$'
 108     idpattern = r'[_a-z][_a-z0-9]*'
 109
 110     def __init__(self, template):
 111         self.template = template
 112
 113     # Search for $$, $identifier, ${identifier}, and any bare $'s
 114
 115     def _invalid(self, mo):
 116         i = mo.start('invalid')
 117         lines = self.template[:i].splitlines(True)
 118         if not lines:
 119             colno = 1
 120             lineno = 1
 121         else:
 122             colno = i - len(''.join(lines[:-1]))
 123             lineno = len(lines)
 124         raise ValueError('Invalid placeholder in string: line %d, col %d' %
 125                          (lineno, colno))
 126
 127     def substitute(self, *args, **kws):
 128         if len(args) > 1:
 129             raise TypeError('Too many positional arguments')
 130         if not args:
 131             mapping = kws
 132         elif kws:
 133             mapping = _multimap(kws, args[0])
 134         else:
 135             mapping = args[0]
 136         # Helper function for .sub()
 137         def convert(mo):
 138             # Check the most common path first.
 139             named = mo.group('named') or mo.group('braced')
 140             if named is not None:
 141                 val = mapping[named]
 142                 # We use this idiom instead of str() because the latter will
 143                 # fail if val is a Unicode containing non-ASCII characters.
 144                 return '%s' % (val,)
 145             if mo.group('escaped') is not None:
 146                 return self.delimiter
 147             if mo.group('invalid') is not None:
 148                 self._invalid(mo)
 149             raise ValueError('Unrecognized named group in pattern',
 150                              self.pattern)
 151         return self.pattern.sub(convert, self.template)
 152
 153     def safe_substitute(self, *args, **kws):
 154         if len(args) > 1:
 155             raise TypeError('Too many positional arguments')
 156         if not args:
 157             mapping = kws
 158         elif kws:
 159             mapping = _multimap(kws, args[0])
 160         else:
 161             mapping = args[0]
 162         # Helper function for .sub()
 163         def convert(mo):
 164             named = mo.group('named')
 165             if named is not None:
 166                 try:
 167                     # We use this idiom instead of str() because the latter
 168                     # will fail if val is a Unicode containing non-ASCII
 169                     return '%s' % (mapping[named],)
 170                 except KeyError:
 171                     return self.delimiter + named
 172             braced = mo.group('braced')
 173             if braced is not None:
 174                 try:
 175                     return '%s' % (mapping[braced],)
 176                 except KeyError:
 177                     return self.delimiter + '{' + braced + '}'
 178             if mo.group('escaped') is not None:
 179                 return self.delimiter
 180             if mo.group('invalid') is not None:
 181                 return self.delimiter
 182             raise ValueError('Unrecognized named group in pattern',
 183                              self.pattern)
 184         return self.pattern.sub(convert, self.template)
 185
 186
 187
 188 ########################################################################
 189 # the Formatter class
 190 # see PEP 3101 for details and purpose of this class
 191
 192 # The hard parts are reused from the C implementation.  They're
 193 # exposed here via the sys module.  sys was chosen because it's always
 194 # available and doesn't have to be dynamically loaded.
 195
 196 # The overall parser is implemented in str._formatter_parser.
 197 # The field name parser is implemented in str._formatter_field_name_split
 198
 199 class Formatter:
 200     def format(self, format_string, *args, **kwargs):
 201         return self.vformat(format_string, args, kwargs)
 202
 203     def vformat(self, format_string, args, kwargs):
 204         used_args = set()
 205         result = self._vformat(format_string, args, kwargs, used_args, 2)
 206         self.check_unused_args(used_args, args, kwargs)
 207         return result
 208
 209     def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
 210         if recursion_depth < 0:
 211             raise ValueError('Max string recursion exceeded')
 212         result = []
 213         for literal_text, field_name, format_spec, conversion in \
 214                 self.parse(format_string):
 215
 216             # output the literal text
 217             if literal_text:
 218                 result.append(literal_text)
 219
 220             # if there's a field, output it
 221             if field_name is not None:
 222                 # this is some markup, find the object and do
 223                 #  the formatting
 224
 225                 # given the field_name, find the object it references
 226                 #  and the argument it came from
 227                 obj, arg_used = self.get_field(field_name, args, kwargs)
 228                 used_args.add(arg_used)
 229
 230                 # do any conversion on the resulting object
 231                 obj = self.convert_field(obj, conversion)
 232
 233                 # expand the format spec, if needed
 234                 format_spec = self._vformat(format_spec, args, kwargs,
 235                                             used_args, recursion_depth-1)
 236
 237                 # format the object and append to the result
 238                 result.append(self.format_field(obj, format_spec))
 239
 240         return ''.join(result)
 241
 242
 243     def get_value(self, key, args, kwargs):
 244         if isinstance(key, int):
 245             return args[key]
 246         else:
 247             return kwargs[key]
 248
 249
 250     def check_unused_args(self, used_args, args, kwargs):
 251         pass
 252
 253
 254     def format_field(self, value, format_spec):
 255         return format(value, format_spec)
 256
 257
 258     def convert_field(self, value, conversion):
 259         # do any conversion on the resulting object
 260         if conversion == 'r':
 261             return repr(value)
 262         elif conversion == 's':
 263             return str(value)
 264         elif conversion is None:
 265             return value
 266         raise ValueError("Unknown converion specifier {0!s}".format(conversion))
 267
 268
 269     # returns an iterable that contains tuples of the form:
 270     # (literal_text, field_name, format_spec, conversion)
 271     # literal_text can be zero length
 272     # field_name can be None, in which case there's no
 273     #  object to format and output
 274     # if field_name is not None, it is looked up, formatted
 275     #  with format_spec and conversion and then used
 276     def parse(self, format_string):
 277         return format_string._formatter_parser()
 278
 279
 280     # given a field_name, find the object it references.
 281     #  field_name:   the field being looked up, e.g. "0.name"
 282     #                 or "lookup[3]"
 283     #  used_args:    a set of which args have been used
 284     #  args, kwargs: as passed in to vformat
 285     def get_field(self, field_name, args, kwargs):
 286         first, rest = field_name._formatter_field_name_split()
 287
 288         obj = self.get_value(first, args, kwargs)
 289
 290         # loop through the rest of the field_name, doing
 291         #  getattr or getitem as needed
 292         for is_attr, i in rest:
 293             if is_attr:
 294                 obj = getattr(obj, i)
 295             else:
 296                 obj = obj[i]
 297
 298         return obj, first