Make the encoding of the unicode->str conversion explicit; fix [52].
[pyyaml/python3.git] / lib / yaml / constructor.py
bloba1295c86f11b3cb8007b0383408dcab08a6a2be0
2 __all__ = ['BaseConstructor', 'SafeConstructor', 'Constructor',
3 'ConstructorError']
5 from error import *
6 from nodes import *
8 import datetime
10 try:
11 set
12 except NameError:
13 from sets import Set as set
15 import binascii, re, sys, types
17 class ConstructorError(MarkedYAMLError):
18 pass
20 class BaseConstructor(object):
22 yaml_constructors = {}
23 yaml_multi_constructors = {}
25 def __init__(self):
26 self.constructed_objects = {}
27 self.recursive_objects = {}
28 self.state_generators = []
29 self.deep_construct = False
31 def check_data(self):
32 # If there are more documents available?
33 return self.check_node()
35 def get_data(self):
36 # Construct and return the next document.
37 if self.check_node():
38 return self.construct_document(self.get_node())
40 def construct_document(self, node):
41 data = self.construct_object(node)
42 while self.state_generators:
43 state_generators = self.state_generators
44 self.state_generators = []
45 for generator in state_generators:
46 for dummy in generator:
47 pass
48 self.constructed_objects = {}
49 self.recursive_objects = {}
50 self.deep_construct = False
51 return data
53 def construct_object(self, node, deep=False):
54 if deep:
55 old_deep = self.deep_construct
56 self.deep_construct = True
57 if node in self.constructed_objects:
58 return self.constructed_objects[node]
59 if node in self.recursive_objects:
60 raise ConstructorError(None, None,
61 "found unconstructable recursive node", node.start_mark)
62 self.recursive_objects[node] = None
63 constructor = None
64 state_constructor = None
65 tag_suffix = None
66 if node.tag in self.yaml_constructors:
67 constructor = self.yaml_constructors[node.tag]
68 else:
69 for tag_prefix in self.yaml_multi_constructors:
70 if node.tag.startswith(tag_prefix):
71 tag_suffix = node.tag[len(tag_prefix):]
72 constructor = self.yaml_multi_constructors[tag_prefix]
73 break
74 else:
75 if None in self.yaml_multi_constructors:
76 tag_suffix = node.tag
77 constructor = self.yaml_multi_constructors[None]
78 elif None in self.yaml_constructors:
79 constructor = self.yaml_constructors[None]
80 elif isinstance(node, ScalarNode):
81 constructor = self.__class__.construct_scalar
82 elif isinstance(node, SequenceNode):
83 constructor = self.__class__.construct_sequence
84 elif isinstance(node, MappingNode):
85 constructor = self.__class__.construct_mapping
86 if tag_suffix is None:
87 data = constructor(self, node)
88 else:
89 data = constructor(self, tag_suffix, node)
90 if isinstance(data, types.GeneratorType):
91 generator = data
92 data = generator.next()
93 if self.deep_construct:
94 for dummy in generator:
95 pass
96 else:
97 self.state_generators.append(generator)
98 self.constructed_objects[node] = data
99 del self.recursive_objects[node]
100 if deep:
101 self.deep_construct = old_deep
102 return data
104 def construct_scalar(self, node):
105 if not isinstance(node, ScalarNode):
106 raise ConstructorError(None, None,
107 "expected a scalar node, but found %s" % node.id,
108 node.start_mark)
109 return node.value
111 def construct_sequence(self, node, deep=False):
112 if not isinstance(node, SequenceNode):
113 raise ConstructorError(None, None,
114 "expected a sequence node, but found %s" % node.id,
115 node.start_mark)
116 return [self.construct_object(child, deep=deep)
117 for child in node.value]
119 def construct_mapping(self, node, deep=False):
120 if not isinstance(node, MappingNode):
121 raise ConstructorError(None, None,
122 "expected a mapping node, but found %s" % node.id,
123 node.start_mark)
124 mapping = {}
125 for key_node, value_node in node.value:
126 key = self.construct_object(key_node, deep=deep)
127 try:
128 hash(key)
129 except TypeError, exc:
130 raise ConstructorError("while constructing a mapping", node.start_mark,
131 "found unacceptable key (%s)" % exc, key_node.start_mark)
132 value = self.construct_object(value_node, deep=deep)
133 mapping[key] = value
134 return mapping
136 def construct_pairs(self, node, deep=False):
137 if not isinstance(node, MappingNode):
138 raise ConstructorError(None, None,
139 "expected a mapping node, but found %s" % node.id,
140 node.start_mark)
141 pairs = []
142 for key_node, value_node in node.value:
143 key = self.construct_object(key_node, deep=deep)
144 value = self.construct_object(value_node, deep=deep)
145 pairs.append((key, value))
146 return pairs
148 def add_constructor(cls, tag, constructor):
149 if not 'yaml_constructors' in cls.__dict__:
150 cls.yaml_constructors = cls.yaml_constructors.copy()
151 cls.yaml_constructors[tag] = constructor
152 add_constructor = classmethod(add_constructor)
154 def add_multi_constructor(cls, tag_prefix, multi_constructor):
155 if not 'yaml_multi_constructors' in cls.__dict__:
156 cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy()
157 cls.yaml_multi_constructors[tag_prefix] = multi_constructor
158 add_multi_constructor = classmethod(add_multi_constructor)
160 class SafeConstructor(BaseConstructor):
162 def construct_scalar(self, node):
163 if isinstance(node, MappingNode):
164 for key_node, value_node in node.value:
165 if key_node.tag == u'tag:yaml.org,2002:value':
166 return self.construct_scalar(value_node)
167 return BaseConstructor.construct_scalar(self, node)
169 def flatten_mapping(self, node):
170 merge = []
171 index = 0
172 while index < len(node.value):
173 key_node, value_node = node.value[index]
174 if key_node.tag == u'tag:yaml.org,2002:merge':
175 del node.value[index]
176 if isinstance(value_node, MappingNode):
177 self.flatten_mapping(value_node)
178 merge.extend(value_node.value)
179 elif isinstance(value_node, SequenceNode):
180 submerge = []
181 for subnode in value_node.value:
182 if not isinstance(subnode, MappingNode):
183 raise ConstructorError("while constructing a mapping",
184 node.start_mark,
185 "expected a mapping for merging, but found %s"
186 % subnode.id, subnode.start_mark)
187 self.flatten_mapping(subnode)
188 submerge.append(subnode.value)
189 submerge.reverse()
190 for value in submerge:
191 merge.extend(value)
192 else:
193 raise ConstructorError("while constructing a mapping", node.start_mark,
194 "expected a mapping or list of mappings for merging, but found %s"
195 % value_node.id, value_node.start_mark)
196 elif key_node.tag == u'tag:yaml.org,2002:value':
197 key_node.tag = u'tag:yaml.org,2002:str'
198 index += 1
199 else:
200 index += 1
201 if merge:
202 node.value = merge + node.value
204 def construct_mapping(self, node, deep=False):
205 if isinstance(node, MappingNode):
206 self.flatten_mapping(node)
207 return BaseConstructor.construct_mapping(self, node, deep=deep)
209 def construct_yaml_null(self, node):
210 self.construct_scalar(node)
211 return None
213 bool_values = {
214 u'yes': True,
215 u'no': False,
216 u'true': True,
217 u'false': False,
218 u'on': True,
219 u'off': False,
222 def construct_yaml_bool(self, node):
223 value = self.construct_scalar(node)
224 return self.bool_values[value.lower()]
226 def construct_yaml_int(self, node):
227 value = str(self.construct_scalar(node))
228 value = value.replace('_', '')
229 sign = +1
230 if value[0] == '-':
231 sign = -1
232 if value[0] in '+-':
233 value = value[1:]
234 if value == '0':
235 return 0
236 elif value.startswith('0b'):
237 return sign*int(value[2:], 2)
238 elif value.startswith('0x'):
239 return sign*int(value[2:], 16)
240 elif value[0] == '0':
241 return sign*int(value, 8)
242 elif ':' in value:
243 digits = [int(part) for part in value.split(':')]
244 digits.reverse()
245 base = 1
246 value = 0
247 for digit in digits:
248 value += digit*base
249 base *= 60
250 return sign*value
251 else:
252 return sign*int(value)
254 inf_value = 1e300
255 while inf_value != inf_value*inf_value:
256 inf_value *= inf_value
257 nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99).
259 def construct_yaml_float(self, node):
260 value = str(self.construct_scalar(node))
261 value = value.replace('_', '').lower()
262 sign = +1
263 if value[0] == '-':
264 sign = -1
265 if value[0] in '+-':
266 value = value[1:]
267 if value == '.inf':
268 return sign*self.inf_value
269 elif value == '.nan':
270 return self.nan_value
271 elif ':' in value:
272 digits = [float(part) for part in value.split(':')]
273 digits.reverse()
274 base = 1
275 value = 0.0
276 for digit in digits:
277 value += digit*base
278 base *= 60
279 return sign*value
280 else:
281 return sign*float(value)
283 def construct_yaml_binary(self, node):
284 value = self.construct_scalar(node)
285 try:
286 return str(value).decode('base64')
287 except (binascii.Error, UnicodeEncodeError), exc:
288 raise ConstructorError(None, None,
289 "failed to decode base64 data: %s" % exc, node.start_mark)
291 timestamp_regexp = re.compile(
292 ur'''^(?P<year>[0-9][0-9][0-9][0-9])
293 -(?P<month>[0-9][0-9]?)
294 -(?P<day>[0-9][0-9]?)
295 (?:(?:[Tt]|[ \t]+)
296 (?P<hour>[0-9][0-9]?)
297 :(?P<minute>[0-9][0-9])
298 :(?P<second>[0-9][0-9])
299 (?:\.(?P<fraction>[0-9]*))?
300 (?:[ \t]*(?P<tz>Z|(?P<tz_sign>[-+])(?P<tz_hour>[0-9][0-9]?)
301 (?::(?P<tz_minute>[0-9][0-9]))?))?)?$''', re.X)
303 def construct_yaml_timestamp(self, node):
304 value = self.construct_scalar(node)
305 match = self.timestamp_regexp.match(node.value)
306 values = match.groupdict()
307 year = int(values['year'])
308 month = int(values['month'])
309 day = int(values['day'])
310 if not values['hour']:
311 return datetime.date(year, month, day)
312 hour = int(values['hour'])
313 minute = int(values['minute'])
314 second = int(values['second'])
315 fraction = 0
316 if values['fraction']:
317 fraction = int(values['fraction'][:6].ljust(6, '0'))
318 delta = None
319 if values['tz_sign']:
320 tz_hour = int(values['tz_hour'])
321 tz_minute = int(values['tz_minute'] or 0)
322 delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute)
323 if values['tz_sign'] == '-':
324 delta = -delta
325 data = datetime.datetime(year, month, day, hour, minute, second, fraction)
326 if delta:
327 data -= delta
328 return data
330 def construct_yaml_omap(self, node):
331 # Note: we do not check for duplicate keys, because it's too
332 # CPU-expensive.
333 omap = []
334 yield omap
335 if not isinstance(node, SequenceNode):
336 raise ConstructorError("while constructing an ordered map", node.start_mark,
337 "expected a sequence, but found %s" % node.id, node.start_mark)
338 for subnode in node.value:
339 if not isinstance(subnode, MappingNode):
340 raise ConstructorError("while constructing an ordered map", node.start_mark,
341 "expected a mapping of length 1, but found %s" % subnode.id,
342 subnode.start_mark)
343 if len(subnode.value) != 1:
344 raise ConstructorError("while constructing an ordered map", node.start_mark,
345 "expected a single mapping item, but found %d items" % len(subnode.value),
346 subnode.start_mark)
347 key_node, value_node = subnode.value[0]
348 key = self.construct_object(key_node)
349 value = self.construct_object(value_node)
350 omap.append((key, value))
352 def construct_yaml_pairs(self, node):
353 # Note: the same code as `construct_yaml_omap`.
354 pairs = []
355 yield pairs
356 if not isinstance(node, SequenceNode):
357 raise ConstructorError("while constructing pairs", node.start_mark,
358 "expected a sequence, but found %s" % node.id, node.start_mark)
359 for subnode in node.value:
360 if not isinstance(subnode, MappingNode):
361 raise ConstructorError("while constructing pairs", node.start_mark,
362 "expected a mapping of length 1, but found %s" % subnode.id,
363 subnode.start_mark)
364 if len(subnode.value) != 1:
365 raise ConstructorError("while constructing pairs", node.start_mark,
366 "expected a single mapping item, but found %d items" % len(subnode.value),
367 subnode.start_mark)
368 key_node, value_node = subnode.value[0]
369 key = self.construct_object(key_node)
370 value = self.construct_object(value_node)
371 pairs.append((key, value))
373 def construct_yaml_set(self, node):
374 data = set()
375 yield data
376 value = self.construct_mapping(node)
377 data.update(value)
379 def construct_yaml_str(self, node):
380 value = self.construct_scalar(node)
381 try:
382 return value.encode('ascii')
383 except UnicodeEncodeError:
384 return value
386 def construct_yaml_seq(self, node):
387 data = []
388 yield data
389 data.extend(self.construct_sequence(node))
391 def construct_yaml_map(self, node):
392 data = {}
393 yield data
394 value = self.construct_mapping(node)
395 data.update(value)
397 def construct_yaml_object(self, node, cls):
398 data = cls.__new__(cls)
399 yield data
400 if hasattr(data, '__setstate__'):
401 state = self.construct_mapping(node, deep=True)
402 data.__setstate__(state)
403 else:
404 state = self.construct_mapping(node)
405 data.__dict__.update(state)
407 def construct_undefined(self, node):
408 raise ConstructorError(None, None,
409 "could not determine a constructor for the tag %r" % node.tag.encode('utf-8'),
410 node.start_mark)
412 SafeConstructor.add_constructor(
413 u'tag:yaml.org,2002:null',
414 SafeConstructor.construct_yaml_null)
416 SafeConstructor.add_constructor(
417 u'tag:yaml.org,2002:bool',
418 SafeConstructor.construct_yaml_bool)
420 SafeConstructor.add_constructor(
421 u'tag:yaml.org,2002:int',
422 SafeConstructor.construct_yaml_int)
424 SafeConstructor.add_constructor(
425 u'tag:yaml.org,2002:float',
426 SafeConstructor.construct_yaml_float)
428 SafeConstructor.add_constructor(
429 u'tag:yaml.org,2002:binary',
430 SafeConstructor.construct_yaml_binary)
432 SafeConstructor.add_constructor(
433 u'tag:yaml.org,2002:timestamp',
434 SafeConstructor.construct_yaml_timestamp)
436 SafeConstructor.add_constructor(
437 u'tag:yaml.org,2002:omap',
438 SafeConstructor.construct_yaml_omap)
440 SafeConstructor.add_constructor(
441 u'tag:yaml.org,2002:pairs',
442 SafeConstructor.construct_yaml_pairs)
444 SafeConstructor.add_constructor(
445 u'tag:yaml.org,2002:set',
446 SafeConstructor.construct_yaml_set)
448 SafeConstructor.add_constructor(
449 u'tag:yaml.org,2002:str',
450 SafeConstructor.construct_yaml_str)
452 SafeConstructor.add_constructor(
453 u'tag:yaml.org,2002:seq',
454 SafeConstructor.construct_yaml_seq)
456 SafeConstructor.add_constructor(
457 u'tag:yaml.org,2002:map',
458 SafeConstructor.construct_yaml_map)
460 SafeConstructor.add_constructor(None,
461 SafeConstructor.construct_undefined)
463 class Constructor(SafeConstructor):
465 def construct_python_str(self, node):
466 return self.construct_scalar(node).encode('utf-8')
468 def construct_python_unicode(self, node):
469 return self.construct_scalar(node)
471 def construct_python_long(self, node):
472 return long(self.construct_yaml_int(node))
474 def construct_python_complex(self, node):
475 return complex(self.construct_scalar(node))
477 def construct_python_tuple(self, node):
478 return tuple(self.construct_sequence(node))
480 def find_python_module(self, name, mark):
481 if not name:
482 raise ConstructorError("while constructing a Python module", mark,
483 "expected non-empty name appended to the tag", mark)
484 try:
485 __import__(name)
486 except ImportError, exc:
487 raise ConstructorError("while constructing a Python module", mark,
488 "cannot find module %r (%s)" % (name.encode('utf-8'), exc), mark)
489 return sys.modules[name]
491 def find_python_name(self, name, mark):
492 if not name:
493 raise ConstructorError("while constructing a Python object", mark,
494 "expected non-empty name appended to the tag", mark)
495 if u'.' in name:
496 # Python 2.4 only
497 #module_name, object_name = name.rsplit('.', 1)
498 items = name.split('.')
499 object_name = items.pop()
500 module_name = '.'.join(items)
501 else:
502 module_name = '__builtin__'
503 object_name = name
504 try:
505 __import__(module_name)
506 except ImportError, exc:
507 raise ConstructorError("while constructing a Python object", mark,
508 "cannot find module %r (%s)" % (module_name.encode('utf-8'), exc), mark)
509 module = sys.modules[module_name]
510 if not hasattr(module, object_name):
511 raise ConstructorError("while constructing a Python object", mark,
512 "cannot find %r in the module %r" % (object_name.encode('utf-8'),
513 module.__name__), mark)
514 return getattr(module, object_name)
516 def construct_python_name(self, suffix, node):
517 value = self.construct_scalar(node)
518 if value:
519 raise ConstructorError("while constructing a Python name", node.start_mark,
520 "expected the empty value, but found %r" % value.encode('utf-8'),
521 node.start_mark)
522 return self.find_python_name(suffix, node.start_mark)
524 def construct_python_module(self, suffix, node):
525 value = self.construct_scalar(node)
526 if value:
527 raise ConstructorError("while constructing a Python module", node.start_mark,
528 "expected the empty value, but found %r" % value.encode('utf-8'),
529 node.start_mark)
530 return self.find_python_module(suffix, node.start_mark)
532 class classobj: pass
534 def make_python_instance(self, suffix, node,
535 args=None, kwds=None, newobj=False):
536 if not args:
537 args = []
538 if not kwds:
539 kwds = {}
540 cls = self.find_python_name(suffix, node.start_mark)
541 if newobj and isinstance(cls, type(self.classobj)) \
542 and not args and not kwds:
543 instance = self.classobj()
544 instance.__class__ = cls
545 return instance
546 elif newobj and isinstance(cls, type):
547 return cls.__new__(cls, *args, **kwds)
548 else:
549 return cls(*args, **kwds)
551 def set_python_instance_state(self, instance, state):
552 if hasattr(instance, '__setstate__'):
553 instance.__setstate__(state)
554 else:
555 slotstate = {}
556 if isinstance(state, tuple) and len(state) == 2:
557 state, slotstate = state
558 if hasattr(instance, '__dict__'):
559 instance.__dict__.update(state)
560 elif state:
561 slotstate.update(state)
562 for key, value in slotstate.items():
563 setattr(object, key, value)
565 def construct_python_object(self, suffix, node):
566 # Format:
567 # !!python/object:module.name { ... state ... }
568 instance = self.make_python_instance(suffix, node, newobj=True)
569 yield instance
570 deep = hasattr(instance, '__setstate__')
571 state = self.construct_mapping(node, deep=deep)
572 self.set_python_instance_state(instance, state)
574 def construct_python_object_apply(self, suffix, node, newobj=False):
575 # Format:
576 # !!python/object/apply # (or !!python/object/new)
577 # args: [ ... arguments ... ]
578 # kwds: { ... keywords ... }
579 # state: ... state ...
580 # listitems: [ ... listitems ... ]
581 # dictitems: { ... dictitems ... }
582 # or short format:
583 # !!python/object/apply [ ... arguments ... ]
584 # The difference between !!python/object/apply and !!python/object/new
585 # is how an object is created, check make_python_instance for details.
586 if isinstance(node, SequenceNode):
587 args = self.construct_sequence(node, deep=True)
588 kwds = {}
589 state = {}
590 listitems = []
591 dictitems = {}
592 else:
593 value = self.construct_mapping(node, deep=True)
594 args = value.get('args', [])
595 kwds = value.get('kwds', {})
596 state = value.get('state', {})
597 listitems = value.get('listitems', [])
598 dictitems = value.get('dictitems', {})
599 instance = self.make_python_instance(suffix, node, args, kwds, newobj)
600 if state:
601 self.set_python_instance_state(instance, state)
602 if listitems:
603 instance.extend(listitems)
604 if dictitems:
605 for key in dictitems:
606 instance[key] = dictitems[key]
607 return instance
609 def construct_python_object_new(self, suffix, node):
610 return self.construct_python_object_apply(suffix, node, newobj=True)
612 Constructor.add_constructor(
613 u'tag:yaml.org,2002:python/none',
614 Constructor.construct_yaml_null)
616 Constructor.add_constructor(
617 u'tag:yaml.org,2002:python/bool',
618 Constructor.construct_yaml_bool)
620 Constructor.add_constructor(
621 u'tag:yaml.org,2002:python/str',
622 Constructor.construct_python_str)
624 Constructor.add_constructor(
625 u'tag:yaml.org,2002:python/unicode',
626 Constructor.construct_python_unicode)
628 Constructor.add_constructor(
629 u'tag:yaml.org,2002:python/int',
630 Constructor.construct_yaml_int)
632 Constructor.add_constructor(
633 u'tag:yaml.org,2002:python/long',
634 Constructor.construct_python_long)
636 Constructor.add_constructor(
637 u'tag:yaml.org,2002:python/float',
638 Constructor.construct_yaml_float)
640 Constructor.add_constructor(
641 u'tag:yaml.org,2002:python/complex',
642 Constructor.construct_python_complex)
644 Constructor.add_constructor(
645 u'tag:yaml.org,2002:python/list',
646 Constructor.construct_yaml_seq)
648 Constructor.add_constructor(
649 u'tag:yaml.org,2002:python/tuple',
650 Constructor.construct_python_tuple)
652 Constructor.add_constructor(
653 u'tag:yaml.org,2002:python/dict',
654 Constructor.construct_yaml_map)
656 Constructor.add_multi_constructor(
657 u'tag:yaml.org,2002:python/name:',
658 Constructor.construct_python_name)
660 Constructor.add_multi_constructor(
661 u'tag:yaml.org,2002:python/module:',
662 Constructor.construct_python_module)
664 Constructor.add_multi_constructor(
665 u'tag:yaml.org,2002:python/object:',
666 Constructor.construct_python_object)
668 Constructor.add_multi_constructor(
669 u'tag:yaml.org,2002:python/object/apply:',
670 Constructor.construct_python_object_apply)
672 Constructor.add_multi_constructor(
673 u'tag:yaml.org,2002:python/object/new:',
674 Constructor.construct_python_object_new)