Fix the tag.
[python/dscho.git] / Lib / string.py
blob7f67abd04a790fc48e4742110c8a1621f4ef9604
1 """A collection of string constants.
3 Public module variables:
5 whitespace -- a string containing all characters considered whitespace
6 lowercase -- a string containing all characters considered lowercase letters
7 uppercase -- a string containing all characters considered uppercase letters
8 letters -- a string containing all characters considered letters
9 digits -- a string containing all characters considered decimal digits
10 hexdigits -- a string containing all characters considered hexadecimal digits
11 octdigits -- a string containing all characters considered octal digits
12 punctuation -- a string containing all characters considered punctuation
13 printable -- a string containing all characters considered printable
15 """
17 # Some strings for ctype-style character classification
18 whitespace = ' \t\n\r\v\f'
19 ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
20 ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
21 ascii_letters = ascii_lowercase + ascii_uppercase
22 digits = '0123456789'
23 hexdigits = digits + 'abcdef' + 'ABCDEF'
24 octdigits = '01234567'
25 punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
26 printable = digits + ascii_letters + punctuation + whitespace
28 # Functions which aren't available as string methods.
30 # Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
31 def capwords(s, sep=None):
32 """capwords(s, [sep]) -> string
34 Split the argument into words using split, capitalize each
35 word using capitalize, and join the capitalized words using
36 join. Note that this replaces runs of whitespace characters by
37 a single space.
39 """
40 return (sep or ' ').join([x.capitalize() for x in s.split(sep)])
43 # Construct a translation map for bytes.translate
44 def maketrans(frm: bytes, to: bytes) -> bytes:
45 """maketrans(frm, to) -> bytes
47 Return a translation table (a bytes object of length 256)
48 suitable for use in bytes.translate where each byte in frm is
49 mapped to the byte at the same position in to.
50 The strings frm and to must be of the same length.
51 """
52 if len(frm) != len(to):
53 raise ValueError("maketrans arguments must have same length")
54 if not (isinstance(frm, bytes) and isinstance(to, bytes)):
55 raise TypeError("maketrans arguments must be bytes objects")
56 L = bytearray(range(256))
57 for i, c in enumerate(frm):
58 L[c] = to[i]
59 return bytes(L)
62 ####################################################################
63 import re as _re
65 class _multimap:
66 """Helper class for combining multiple mappings.
68 Used by .{safe_,}substitute() to combine the mapping and keyword
69 arguments.
70 """
71 def __init__(self, primary, secondary):
72 self._primary = primary
73 self._secondary = secondary
75 def __getitem__(self, key):
76 try:
77 return self._primary[key]
78 except KeyError:
79 return self._secondary[key]
82 class _TemplateMetaclass(type):
83 pattern = r"""
84 %(delim)s(?:
85 (?P<escaped>%(delim)s) | # Escape sequence of two delimiters
86 (?P<named>%(id)s) | # delimiter and a Python identifier
87 {(?P<braced>%(id)s)} | # delimiter and a braced identifier
88 (?P<invalid>) # Other ill-formed delimiter exprs
90 """
92 def __init__(cls, name, bases, dct):
93 super(_TemplateMetaclass, cls).__init__(name, bases, dct)
94 if 'pattern' in dct:
95 pattern = cls.pattern
96 else:
97 pattern = _TemplateMetaclass.pattern % {
98 'delim' : _re.escape(cls.delimiter),
99 'id' : cls.idpattern,
101 cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)
104 class Template(metaclass=_TemplateMetaclass):
105 """A string class for supporting $-substitutions."""
107 delimiter = '$'
108 idpattern = r'[_a-z][_a-z0-9]*'
110 def __init__(self, template):
111 self.template = template
113 # Search for $$, $identifier, ${identifier}, and any bare $'s
115 def _invalid(self, mo):
116 i = mo.start('invalid')
117 lines = self.template[:i].splitlines(True)
118 if not lines:
119 colno = 1
120 lineno = 1
121 else:
122 colno = i - len(''.join(lines[:-1]))
123 lineno = len(lines)
124 raise ValueError('Invalid placeholder in string: line %d, col %d' %
125 (lineno, colno))
127 def substitute(self, *args, **kws):
128 if len(args) > 1:
129 raise TypeError('Too many positional arguments')
130 if not args:
131 mapping = kws
132 elif kws:
133 mapping = _multimap(kws, args[0])
134 else:
135 mapping = args[0]
136 # Helper function for .sub()
137 def convert(mo):
138 # Check the most common path first.
139 named = mo.group('named') or mo.group('braced')
140 if named is not None:
141 val = mapping[named]
142 # We use this idiom instead of str() because the latter will
143 # fail if val is a Unicode containing non-ASCII characters.
144 return '%s' % (val,)
145 if mo.group('escaped') is not None:
146 return self.delimiter
147 if mo.group('invalid') is not None:
148 self._invalid(mo)
149 raise ValueError('Unrecognized named group in pattern',
150 self.pattern)
151 return self.pattern.sub(convert, self.template)
153 def safe_substitute(self, *args, **kws):
154 if len(args) > 1:
155 raise TypeError('Too many positional arguments')
156 if not args:
157 mapping = kws
158 elif kws:
159 mapping = _multimap(kws, args[0])
160 else:
161 mapping = args[0]
162 # Helper function for .sub()
163 def convert(mo):
164 named = mo.group('named')
165 if named is not None:
166 try:
167 # We use this idiom instead of str() because the latter
168 # will fail if val is a Unicode containing non-ASCII
169 return '%s' % (mapping[named],)
170 except KeyError:
171 return self.delimiter + named
172 braced = mo.group('braced')
173 if braced is not None:
174 try:
175 return '%s' % (mapping[braced],)
176 except KeyError:
177 return self.delimiter + '{' + braced + '}'
178 if mo.group('escaped') is not None:
179 return self.delimiter
180 if mo.group('invalid') is not None:
181 return self.delimiter
182 raise ValueError('Unrecognized named group in pattern',
183 self.pattern)
184 return self.pattern.sub(convert, self.template)
188 ########################################################################
189 # the Formatter class
190 # see PEP 3101 for details and purpose of this class
192 # The hard parts are reused from the C implementation. They're
193 # exposed here via the sys module. sys was chosen because it's always
194 # available and doesn't have to be dynamically loaded.
196 # The overall parser is implemented in str._formatter_parser.
197 # The field name parser is implemented in str._formatter_field_name_split
199 class Formatter:
200 def format(self, format_string, *args, **kwargs):
201 return self.vformat(format_string, args, kwargs)
203 def vformat(self, format_string, args, kwargs):
204 used_args = set()
205 result = self._vformat(format_string, args, kwargs, used_args, 2)
206 self.check_unused_args(used_args, args, kwargs)
207 return result
209 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
210 if recursion_depth < 0:
211 raise ValueError('Max string recursion exceeded')
212 result = []
213 for literal_text, field_name, format_spec, conversion in \
214 self.parse(format_string):
216 # output the literal text
217 if literal_text:
218 result.append(literal_text)
220 # if there's a field, output it
221 if field_name is not None:
222 # this is some markup, find the object and do
223 # the formatting
225 # given the field_name, find the object it references
226 # and the argument it came from
227 obj, arg_used = self.get_field(field_name, args, kwargs)
228 used_args.add(arg_used)
230 # do any conversion on the resulting object
231 obj = self.convert_field(obj, conversion)
233 # expand the format spec, if needed
234 format_spec = self._vformat(format_spec, args, kwargs,
235 used_args, recursion_depth-1)
237 # format the object and append to the result
238 result.append(self.format_field(obj, format_spec))
240 return ''.join(result)
243 def get_value(self, key, args, kwargs):
244 if isinstance(key, int):
245 return args[key]
246 else:
247 return kwargs[key]
250 def check_unused_args(self, used_args, args, kwargs):
251 pass
254 def format_field(self, value, format_spec):
255 return format(value, format_spec)
258 def convert_field(self, value, conversion):
259 # do any conversion on the resulting object
260 if conversion == 'r':
261 return repr(value)
262 elif conversion == 's':
263 return str(value)
264 elif conversion is None:
265 return value
266 raise ValueError("Unknown converion specifier {0!s}".format(conversion))
269 # returns an iterable that contains tuples of the form:
270 # (literal_text, field_name, format_spec, conversion)
271 # literal_text can be zero length
272 # field_name can be None, in which case there's no
273 # object to format and output
274 # if field_name is not None, it is looked up, formatted
275 # with format_spec and conversion and then used
276 def parse(self, format_string):
277 return format_string._formatter_parser()
280 # given a field_name, find the object it references.
281 # field_name: the field being looked up, e.g. "0.name"
282 # or "lookup[3]"
283 # used_args: a set of which args have been used
284 # args, kwargs: as passed in to vformat
285 def get_field(self, field_name, args, kwargs):
286 first, rest = field_name._formatter_field_name_split()
288 obj = self.get_value(first, args, kwargs)
290 # loop through the rest of the field_name, doing
291 # getattr or getitem as needed
292 for is_attr, i in rest:
293 if is_attr:
294 obj = getattr(obj, i)
295 else:
296 obj = obj[i]
298 return obj, first