Subclass all base classes from `object`.
[pyyaml/python3.git] / lib / yaml / emitter.py
blobd9d1bf819de0c16b6df1399b01b28afb1174da3d
2 # Emitter expects events obeying the following grammar:
3 # stream ::= STREAM-START document* STREAM-END
4 # document ::= DOCUMENT-START node DOCUMENT-END
5 # node ::= SCALAR | sequence | mapping
6 # sequence ::= SEQUENCE-START node* SEQUENCE-END
7 # mapping ::= MAPPING-START (node node)* MAPPING-END
9 __all__ = ['Emitter', 'EmitterError']
11 from error import YAMLError
12 from events import *
14 import re
16 class EmitterError(YAMLError):
17 pass
19 class ScalarAnalysis(object):
20 def __init__(self, scalar, empty, multiline,
21 allow_flow_plain, allow_block_plain,
22 allow_single_quoted, allow_double_quoted,
23 allow_block):
24 self.scalar = scalar
25 self.empty = empty
26 self.multiline = multiline
27 self.allow_flow_plain = allow_flow_plain
28 self.allow_block_plain = allow_block_plain
29 self.allow_single_quoted = allow_single_quoted
30 self.allow_double_quoted = allow_double_quoted
31 self.allow_block = allow_block
33 class Emitter(object):
35 DEFAULT_TAG_PREFIXES = {
36 u'!' : u'!',
37 u'tag:yaml.org,2002:' : u'!!',
40 def __init__(self, stream, canonical=None, indent=None, width=None,
41 allow_unicode=None, line_break=None):
43 # The stream should have the methods `write` and possibly `flush`.
44 self.stream = stream
46 # Encoding can be overriden by STREAM-START.
47 self.encoding = None
49 # Emitter is a state machine with a stack of states to handle nested
50 # structures.
51 self.states = []
52 self.state = self.expect_stream_start
54 # Current event and the event queue.
55 self.events = []
56 self.event = None
58 # The current indentation level and the stack of previous indents.
59 self.indents = []
60 self.indent = None
62 # Flow level.
63 self.flow_level = 0
65 # Contexts.
66 self.root_context = False
67 self.sequence_context = False
68 self.mapping_context = False
69 self.simple_key_context = False
71 # Characteristics of the last emitted character:
72 # - current position.
73 # - is it a whitespace?
74 # - is it an indention character
75 # (indentation space, '-', '?', or ':')?
76 self.line = 0
77 self.column = 0
78 self.whitespace = True
79 self.indention = True
81 # Formatting details.
82 self.canonical = canonical
83 self.allow_unicode = allow_unicode
84 self.best_indent = 2
85 if indent and 1 < indent < 10:
86 self.best_indent = indent
87 self.best_width = 80
88 if width and width > self.best_indent*2:
89 self.best_width = width
90 self.best_line_break = u'\n'
91 if line_break in [u'\r', u'\n', u'\r\n']:
92 self.best_line_break = line_break
94 # Tag prefixes.
95 self.tag_prefixes = None
97 # Prepared anchor and tag.
98 self.prepared_anchor = None
99 self.prepared_tag = None
101 # Scalar analysis and style.
102 self.analysis = None
103 self.style = None
105 def emit(self, event):
106 self.events.append(event)
107 while not self.need_more_events():
108 self.event = self.events.pop(0)
109 self.state()
110 self.event = None
112 # In some cases, we wait for a few next events before emitting.
114 def need_more_events(self):
115 if not self.events:
116 return True
117 event = self.events[0]
118 if isinstance(event, DocumentStartEvent):
119 return self.need_events(1)
120 elif isinstance(event, SequenceStartEvent):
121 return self.need_events(2)
122 elif isinstance(event, MappingStartEvent):
123 return self.need_events(3)
124 else:
125 return False
127 def need_events(self, count):
128 level = 0
129 for event in self.events[1:]:
130 if isinstance(event, (DocumentStartEvent, CollectionStartEvent)):
131 level += 1
132 elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)):
133 level -= 1
134 elif isinstance(event, StreamEndEvent):
135 level = -1
136 if level < 0:
137 return False
138 return (len(self.events) < count+1)
140 def increase_indent(self, flow=False, indentless=False):
141 self.indents.append(self.indent)
142 if self.indent is None:
143 if flow:
144 self.indent = self.best_indent
145 else:
146 self.indent = 0
147 elif not indentless:
148 self.indent += self.best_indent
150 # States.
152 # Stream handlers.
154 def expect_stream_start(self):
155 if isinstance(self.event, StreamStartEvent):
156 if self.event.encoding:
157 self.encoding = self.event.encoding
158 self.write_stream_start()
159 self.state = self.expect_first_document_start
160 else:
161 raise EmitterError("expected StreamStartEvent, but got %s"
162 % self.event)
164 def expect_nothing(self):
165 raise EmitterError("expected nothing, but got %s" % self.event)
167 # Document handlers.
169 def expect_first_document_start(self):
170 return self.expect_document_start(first=True)
172 def expect_document_start(self, first=False):
173 if isinstance(self.event, DocumentStartEvent):
174 if self.event.version:
175 version_text = self.prepare_version(self.event.version)
176 self.write_version_directive(version_text)
177 self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy()
178 if self.event.tags:
179 handles = self.event.tags.keys()
180 handles.sort()
181 for handle in handles:
182 prefix = self.event.tags[handle]
183 self.tag_prefixes[prefix] = handle
184 handle_text = self.prepare_tag_handle(handle)
185 prefix_text = self.prepare_tag_prefix(prefix)
186 self.write_tag_directive(handle_text, prefix_text)
187 implicit = (first and not self.event.explicit and not self.canonical
188 and not self.event.version and not self.event.tags
189 and not self.check_empty_document())
190 if not implicit:
191 self.write_indent()
192 self.write_indicator(u'---', True)
193 if self.canonical:
194 self.write_indent()
195 self.state = self.expect_document_root
196 elif isinstance(self.event, StreamEndEvent):
197 self.write_stream_end()
198 self.state = self.expect_nothing
199 else:
200 raise EmitterError("expected DocumentStartEvent, but got %s"
201 % self.event)
203 def expect_document_end(self):
204 if isinstance(self.event, DocumentEndEvent):
205 self.write_indent()
206 if self.event.explicit:
207 self.write_indicator(u'...', True)
208 self.write_indent()
209 self.flush_stream()
210 self.state = self.expect_document_start
211 else:
212 raise EmitterError("expected DocumentEndEvent, but got %s"
213 % self.event)
215 def expect_document_root(self):
216 self.states.append(self.expect_document_end)
217 self.expect_node(root=True)
219 # Node handlers.
221 def expect_node(self, root=False, sequence=False, mapping=False,
222 simple_key=False):
223 self.root_context = root
224 self.sequence_context = sequence
225 self.mapping_context = mapping
226 self.simple_key_context = simple_key
227 if isinstance(self.event, AliasEvent):
228 self.expect_alias()
229 elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)):
230 self.process_anchor(u'&')
231 self.process_tag()
232 if isinstance(self.event, ScalarEvent):
233 self.expect_scalar()
234 elif isinstance(self.event, SequenceStartEvent):
235 if self.flow_level or self.canonical or self.event.flow_style \
236 or self.check_empty_sequence():
237 self.expect_flow_sequence()
238 else:
239 self.expect_block_sequence()
240 elif isinstance(self.event, MappingStartEvent):
241 if self.flow_level or self.canonical or self.event.flow_style \
242 or self.check_empty_mapping():
243 self.expect_flow_mapping()
244 else:
245 self.expect_block_mapping()
246 else:
247 raise EmitterError("expected NodeEvent, but got %s" % self.event)
249 def expect_alias(self):
250 if self.event.anchor is None:
251 raise EmitterError("anchor is not specified for alias")
252 self.process_anchor(u'*')
253 self.state = self.states.pop()
255 def expect_scalar(self):
256 self.increase_indent(flow=True)
257 self.process_scalar()
258 self.indent = self.indents.pop()
259 self.state = self.states.pop()
261 # Flow sequence handlers.
263 def expect_flow_sequence(self):
264 self.write_indicator(u'[', True, whitespace=True)
265 self.flow_level += 1
266 self.increase_indent(flow=True)
267 self.state = self.expect_first_flow_sequence_item
269 def expect_first_flow_sequence_item(self):
270 if isinstance(self.event, SequenceEndEvent):
271 self.indent = self.indents.pop()
272 self.flow_level -= 1
273 self.write_indicator(u']', False)
274 self.state = self.states.pop()
275 else:
276 if self.canonical or self.column > self.best_width:
277 self.write_indent()
278 self.states.append(self.expect_flow_sequence_item)
279 self.expect_node(sequence=True)
281 def expect_flow_sequence_item(self):
282 if isinstance(self.event, SequenceEndEvent):
283 self.indent = self.indents.pop()
284 self.flow_level -= 1
285 if self.canonical:
286 self.write_indicator(u',', False)
287 self.write_indent()
288 self.write_indicator(u']', False)
289 self.state = self.states.pop()
290 else:
291 self.write_indicator(u',', False)
292 if self.canonical or self.column > self.best_width:
293 self.write_indent()
294 self.states.append(self.expect_flow_sequence_item)
295 self.expect_node(sequence=True)
297 # Flow mapping handlers.
299 def expect_flow_mapping(self):
300 self.write_indicator(u'{', True, whitespace=True)
301 self.flow_level += 1
302 self.increase_indent(flow=True)
303 self.state = self.expect_first_flow_mapping_key
305 def expect_first_flow_mapping_key(self):
306 if isinstance(self.event, MappingEndEvent):
307 self.indent = self.indents.pop()
308 self.flow_level -= 1
309 self.write_indicator(u'}', False)
310 self.state = self.states.pop()
311 else:
312 if self.canonical or self.column > self.best_width:
313 self.write_indent()
314 if not self.canonical and self.check_simple_key():
315 self.states.append(self.expect_flow_mapping_simple_value)
316 self.expect_node(mapping=True, simple_key=True)
317 else:
318 self.write_indicator(u'?', True)
319 self.states.append(self.expect_flow_mapping_value)
320 self.expect_node(mapping=True)
322 def expect_flow_mapping_key(self):
323 if isinstance(self.event, MappingEndEvent):
324 self.indent = self.indents.pop()
325 self.flow_level -= 1
326 if self.canonical:
327 self.write_indicator(u',', False)
328 self.write_indent()
329 self.write_indicator(u'}', False)
330 self.state = self.states.pop()
331 else:
332 self.write_indicator(u',', False)
333 if self.canonical or self.column > self.best_width:
334 self.write_indent()
335 if not self.canonical and self.check_simple_key():
336 self.states.append(self.expect_flow_mapping_simple_value)
337 self.expect_node(mapping=True, simple_key=True)
338 else:
339 self.write_indicator(u'?', True)
340 self.states.append(self.expect_flow_mapping_value)
341 self.expect_node(mapping=True)
343 def expect_flow_mapping_simple_value(self):
344 self.write_indicator(u':', False)
345 self.states.append(self.expect_flow_mapping_key)
346 self.expect_node(mapping=True)
348 def expect_flow_mapping_value(self):
349 if self.canonical or self.column > self.best_width:
350 self.write_indent()
351 self.write_indicator(u':', True)
352 self.states.append(self.expect_flow_mapping_key)
353 self.expect_node(mapping=True)
355 # Block sequence handlers.
357 def expect_block_sequence(self):
358 indentless = (self.mapping_context and not self.indention)
359 self.increase_indent(flow=False, indentless=indentless)
360 self.state = self.expect_first_block_sequence_item
362 def expect_first_block_sequence_item(self):
363 return self.expect_block_sequence_item(first=True)
365 def expect_block_sequence_item(self, first=False):
366 if not first and isinstance(self.event, SequenceEndEvent):
367 self.indent = self.indents.pop()
368 self.state = self.states.pop()
369 else:
370 self.write_indent()
371 self.write_indicator(u'-', True, indention=True)
372 self.states.append(self.expect_block_sequence_item)
373 self.expect_node(sequence=True)
375 # Block mapping handlers.
377 def expect_block_mapping(self):
378 self.increase_indent(flow=False)
379 self.state = self.expect_first_block_mapping_key
381 def expect_first_block_mapping_key(self):
382 return self.expect_block_mapping_key(first=True)
384 def expect_block_mapping_key(self, first=False):
385 if not first and isinstance(self.event, MappingEndEvent):
386 self.indent = self.indents.pop()
387 self.state = self.states.pop()
388 else:
389 self.write_indent()
390 if self.check_simple_key():
391 self.states.append(self.expect_block_mapping_simple_value)
392 self.expect_node(mapping=True, simple_key=True)
393 else:
394 self.write_indicator(u'?', True, indention=True)
395 self.states.append(self.expect_block_mapping_value)
396 self.expect_node(mapping=True)
398 def expect_block_mapping_simple_value(self):
399 self.write_indicator(u':', False)
400 self.states.append(self.expect_block_mapping_key)
401 self.expect_node(mapping=True)
403 def expect_block_mapping_value(self):
404 self.write_indent()
405 self.write_indicator(u':', True, indention=True)
406 self.states.append(self.expect_block_mapping_key)
407 self.expect_node(mapping=True)
409 # Checkers.
411 def check_empty_sequence(self):
412 return (isinstance(self.event, SequenceStartEvent) and self.events
413 and isinstance(self.events[0], SequenceEndEvent))
415 def check_empty_mapping(self):
416 return (isinstance(self.event, MappingStartEvent) and self.events
417 and isinstance(self.events[0], MappingEndEvent))
419 def check_empty_document(self):
420 if not isinstance(self.event, DocumentStartEvent) or not self.events:
421 return False
422 event = self.events[0]
423 return (isinstance(event, ScalarEvent) and event.anchor is None
424 and event.tag is None and event.implicit and event.value == u'')
426 def check_simple_key(self):
427 length = 0
428 if isinstance(self.event, NodeEvent) and self.event.anchor is not None:
429 if self.prepared_anchor is None:
430 self.prepared_anchor = self.prepare_anchor(self.event.anchor)
431 length += len(self.prepared_anchor)
432 if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \
433 and self.event.tag is not None:
434 if self.prepared_tag is None:
435 self.prepared_tag = self.prepare_tag(self.event.tag)
436 length += len(self.prepared_tag)
437 if isinstance(self.event, ScalarEvent):
438 if self.analysis is None:
439 self.analysis = self.analyze_scalar(self.event.value)
440 length += len(self.analysis.scalar)
441 return (length < 128 and (isinstance(self.event, AliasEvent)
442 or (isinstance(self.event, ScalarEvent)
443 and not self.analysis.empty and not self.analysis.multiline)
444 or self.check_empty_sequence() or self.check_empty_mapping()))
446 # Anchor, Tag, and Scalar processors.
448 def process_anchor(self, indicator):
449 if self.event.anchor is None:
450 self.prepared_anchor = None
451 return
452 if self.prepared_anchor is None:
453 self.prepared_anchor = self.prepare_anchor(self.event.anchor)
454 if self.prepared_anchor:
455 self.write_indicator(indicator+self.prepared_anchor, True)
456 self.prepared_anchor = None
458 def process_tag(self):
459 tag = self.event.tag
460 if isinstance(self.event, ScalarEvent):
461 if self.style is None:
462 self.style = self.choose_scalar_style()
463 if ((not self.canonical or tag is None) and
464 ((self.style == '' and self.event.implicit[0])
465 or (self.style != '' and self.event.implicit[1]))):
466 self.prepared_tag = None
467 return
468 if self.event.implicit[0] and tag is None:
469 tag = u'!'
470 self.prepared_tag = None
471 else:
472 if (not self.canonical or tag is None) and self.event.implicit:
473 self.prepared_tag = None
474 return
475 if tag is None:
476 raise EmitterError("tag is not specified")
477 if self.prepared_tag is None:
478 self.prepared_tag = self.prepare_tag(tag)
479 if self.prepared_tag:
480 self.write_indicator(self.prepared_tag, True)
481 self.prepared_tag = None
483 def choose_scalar_style(self):
484 if self.analysis is None:
485 self.analysis = self.analyze_scalar(self.event.value)
486 if self.event.style == '"' or self.canonical:
487 return '"'
488 if not self.event.style and self.event.implicit[0]:
489 if (not (self.simple_key_context and
490 (self.analysis.empty or self.analysis.multiline))
491 and (self.flow_level and self.analysis.allow_flow_plain
492 or (not self.flow_level and self.analysis.allow_block_plain))):
493 return ''
494 if self.event.style and self.event.style in '|>':
495 if (not self.flow_level and not self.simple_key_context
496 and self.analysis.allow_block):
497 return self.event.style
498 if not self.event.style or self.event.style == '\'':
499 if (self.analysis.allow_single_quoted and
500 not (self.simple_key_context and self.analysis.multiline)):
501 return '\''
502 return '"'
504 def process_scalar(self):
505 if self.analysis is None:
506 self.analysis = self.analyze_scalar(self.event.value)
507 if self.style is None:
508 self.style = self.choose_scalar_style()
509 split = (not self.simple_key_context)
510 #if self.analysis.multiline and split \
511 # and (not self.style or self.style in '\'\"'):
512 # self.write_indent()
513 if self.style == '"':
514 self.write_double_quoted(self.analysis.scalar, split)
515 elif self.style == '\'':
516 self.write_single_quoted(self.analysis.scalar, split)
517 elif self.style == '>':
518 self.write_folded(self.analysis.scalar)
519 elif self.style == '|':
520 self.write_literal(self.analysis.scalar)
521 else:
522 self.write_plain(self.analysis.scalar, split)
523 self.analysis = None
524 self.style = None
526 # Analyzers.
528 def prepare_version(self, version):
529 major, minor = version
530 if major != 1:
531 raise EmitterError("unsupported YAML version: %d.%d" % (major, minor))
532 return u'%d.%d' % (major, minor)
534 def prepare_tag_handle(self, handle):
535 if not handle:
536 raise EmitterError("tag handle must not be empty")
537 if handle[0] != u'!' or handle[-1] != u'!':
538 raise EmitterError("tag handle must start and end with '!': %r"
539 % (handle.encode('utf-8')))
540 for ch in handle[1:-1]:
541 if not (u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
542 or ch in u'-_'):
543 raise EmitterError("invalid character %r in the tag handle: %r"
544 % (ch.encode('utf-8'), handle.encode('utf-8')))
545 return handle
547 def prepare_tag_prefix(self, prefix):
548 if not prefix:
549 raise EmitterError("tag prefix must not be empty")
550 chunks = []
551 start = end = 0
552 if prefix[0] == u'!':
553 end = 1
554 while end < len(prefix):
555 ch = prefix[end]
556 if u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
557 or ch in u'-;/?!:@&=+$,_.~*\'()[]':
558 end += 1
559 else:
560 if start < end:
561 chunks.append(prefix[start:end])
562 start = end = end+1
563 data = ch.encode('utf-8')
564 for ch in data:
565 chunks.append(u'%%%02X' % ord(ch))
566 if start < end:
567 chunks.append(prefix[start:end])
568 return u''.join(chunks)
570 def prepare_tag(self, tag):
571 if not tag:
572 raise EmitterError("tag must not be empty")
573 if tag == u'!':
574 return tag
575 handle = None
576 suffix = tag
577 for prefix in self.tag_prefixes:
578 if tag.startswith(prefix) \
579 and (prefix == u'!' or len(prefix) < len(tag)):
580 handle = self.tag_prefixes[prefix]
581 suffix = tag[len(prefix):]
582 chunks = []
583 start = end = 0
584 while end < len(suffix):
585 ch = suffix[end]
586 if u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
587 or ch in u'-;/?:@&=+$,_.~*\'()[]' \
588 or (ch == u'!' and handle != u'!'):
589 end += 1
590 else:
591 if start < end:
592 chunks.append(suffix[start:end])
593 start = end = end+1
594 data = ch.encode('utf-8')
595 for ch in data:
596 chunks.append(u'%%%02X' % ord(ch))
597 if start < end:
598 chunks.append(suffix[start:end])
599 suffix_text = u''.join(chunks)
600 if handle:
601 return u'%s%s' % (handle, suffix_text)
602 else:
603 return u'!<%s>' % suffix_text
605 def prepare_anchor(self, anchor):
606 if not anchor:
607 raise EmitterError("anchor must not be empty")
608 for ch in anchor:
609 if not (u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
610 or ch in u'-_'):
611 raise EmitterError("invalid character %r in the anchor: %r"
612 % (ch.encode('utf-8'), anchor.encode('utf-8')))
613 return anchor
615 def analyze_scalar(self, scalar):
617 # Empty scalar is a special case.
618 if not scalar:
619 return ScalarAnalysis(scalar=scalar, empty=True, multiline=False,
620 allow_flow_plain=False, allow_block_plain=True,
621 allow_single_quoted=True, allow_double_quoted=True,
622 allow_block=False)
624 # Indicators and special characters.
625 block_indicators = False
626 flow_indicators = False
627 line_breaks = False
628 special_characters = False
630 # Whitespaces.
631 inline_spaces = False # non-space space+ non-space
632 inline_breaks = False # non-space break+ non-space
633 leading_spaces = False # ^ space+ (non-space | $)
634 leading_breaks = False # ^ break+ (non-space | $)
635 trailing_spaces = False # (^ | non-space) space+ $
636 trailing_breaks = False # (^ | non-space) break+ $
637 inline_breaks_spaces = False # non-space break+ space+ non-space
638 mixed_breaks_spaces = False # anything else
640 # Check document indicators.
641 if scalar.startswith(u'---') or scalar.startswith(u'...'):
642 block_indicators = True
643 flow_indicators = True
645 # First character or preceded by a whitespace.
646 preceeded_by_space = True
648 # Last character or followed by a whitespace.
649 followed_by_space = (len(scalar) == 1 or
650 scalar[1] in u'\0 \t\r\n\x85\u2028\u2029')
652 # The current series of whitespaces contain plain spaces.
653 spaces = False
655 # The current series of whitespaces contain line breaks.
656 breaks = False
658 # The current series of whitespaces contain a space followed by a
659 # break.
660 mixed = False
662 # The current series of whitespaces start at the beginning of the
663 # scalar.
664 leading = False
666 index = 0
667 while index < len(scalar):
668 ch = scalar[index]
670 # Check for indicators.
672 if index == 0:
673 # Leading indicators are special characters.
674 if ch in u'#,[]{}&*!|>\'\"%@`':
675 flow_indicators = True
676 block_indicators = True
677 if ch in u'?:':
678 flow_indicators = True
679 if followed_by_space:
680 block_indicators = True
681 if ch == u'-' and followed_by_space:
682 flow_indicators = True
683 block_indicators = True
684 else:
685 # Some indicators cannot appear within a scalar as well.
686 if ch in u',?[]{}':
687 flow_indicators = True
688 if ch == u':':
689 flow_indicators = True
690 if followed_by_space:
691 block_indicators = True
692 if ch == u'#' and preceeded_by_space:
693 flow_indicators = True
694 block_indicators = True
696 # Check for line breaks, special, and unicode characters.
698 if ch in u'\n\x85\u2028\u2029':
699 line_breaks = True
700 if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'):
701 if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF'
702 or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF':
703 unicode_characters = True
704 if not self.allow_unicode:
705 special_characters = True
706 else:
707 special_characters = True
709 # Spaces, line breaks, and how they are mixed. State machine.
711 # Start or continue series of whitespaces.
712 if ch in u' \n\x85\u2028\u2029':
713 if spaces and breaks:
714 if ch != u' ': # break+ (space+ break+) => mixed
715 mixed = True
716 elif spaces:
717 if ch != u' ': # (space+ break+) => mixed
718 breaks = True
719 mixed = True
720 elif breaks:
721 if ch == u' ': # break+ space+
722 spaces = True
723 else:
724 leading = (index == 0)
725 if ch == u' ': # space+
726 spaces = True
727 else: # break+
728 breaks = True
730 # Series of whitespaces ended with a non-space.
731 elif spaces or breaks:
732 if leading:
733 if spaces and breaks:
734 mixed_breaks_spaces = True
735 elif spaces:
736 leading_spaces = True
737 elif breaks:
738 leading_breaks = True
739 else:
740 if mixed:
741 mixed_breaks_spaces = True
742 elif spaces and breaks:
743 inline_breaks_spaces = True
744 elif spaces:
745 inline_spaces = True
746 elif breaks:
747 inline_breaks = True
748 spaces = breaks = mixed = leading = False
750 # Series of whitespaces reach the end.
751 if (spaces or breaks) and (index == len(scalar)-1):
752 if spaces and breaks:
753 mixed_breaks_spaces = True
754 elif spaces:
755 trailing_spaces = True
756 if leading:
757 leading_spaces = True
758 elif breaks:
759 trailing_breaks = True
760 if leading:
761 leading_breaks = True
762 spaces = breaks = mixed = leading = False
764 # Prepare for the next character.
765 index += 1
766 preceeded_by_space = (ch in u'\0 \t\r\n\x85\u2028\u2029')
767 followed_by_space = (index+1 >= len(scalar) or
768 scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029')
770 # Let's decide what styles are allowed.
771 allow_flow_plain = True
772 allow_block_plain = True
773 allow_single_quoted = True
774 allow_double_quoted = True
775 allow_block = True
777 # Leading and trailing whitespace are bad for plain scalars. We also
778 # do not want to mess with leading whitespaces for block scalars.
779 if leading_spaces or leading_breaks or trailing_spaces:
780 allow_flow_plain = allow_block_plain = allow_block = False
782 # Trailing breaks are fine for block scalars, but unacceptable for
783 # plain scalars.
784 if trailing_breaks:
785 allow_flow_plain = allow_block_plain = False
787 # The combination of (space+ break+) is only acceptable for block
788 # scalars.
789 if inline_breaks_spaces:
790 allow_flow_plain = allow_block_plain = allow_single_quoted = False
792 # Mixed spaces and breaks, as well as special character are only
793 # allowed for double quoted scalars.
794 if mixed_breaks_spaces or special_characters:
795 allow_flow_plain = allow_block_plain = \
796 allow_single_quoted = allow_block = False
798 # We don't emit multiline plain scalars.
799 if line_breaks:
800 allow_flow_plain = allow_block_plain = False
802 # Flow indicators are forbidden for flow plain scalars.
803 if flow_indicators:
804 allow_flow_plain = False
806 # Block indicators are forbidden for block plain scalars.
807 if block_indicators:
808 allow_block_plain = False
810 return ScalarAnalysis(scalar=scalar,
811 empty=False, multiline=line_breaks,
812 allow_flow_plain=allow_flow_plain,
813 allow_block_plain=allow_block_plain,
814 allow_single_quoted=allow_single_quoted,
815 allow_double_quoted=allow_double_quoted,
816 allow_block=allow_block)
818 # Writers.
820 def flush_stream(self):
821 if hasattr(self.stream, 'flush'):
822 self.stream.flush()
824 def write_stream_start(self):
825 # Write BOM if needed.
826 if self.encoding and self.encoding.startswith('utf-16'):
827 self.stream.write(u'\xFF\xFE'.encode(self.encoding))
829 def write_stream_end(self):
830 self.flush_stream()
832 def write_indicator(self, indicator, need_whitespace,
833 whitespace=False, indention=False):
834 if self.whitespace or not need_whitespace:
835 data = indicator
836 else:
837 data = u' '+indicator
838 self.whitespace = whitespace
839 self.indention = self.indention and indention
840 self.column += len(data)
841 if self.encoding:
842 data = data.encode(self.encoding)
843 self.stream.write(data)
845 def write_indent(self):
846 indent = self.indent or 0
847 if not self.indention or self.column > indent \
848 or (self.column == indent and not self.whitespace):
849 self.write_line_break()
850 if self.column < indent:
851 self.whitespace = True
852 data = u' '*(indent-self.column)
853 self.column = indent
854 if self.encoding:
855 data = data.encode(self.encoding)
856 self.stream.write(data)
858 def write_line_break(self, data=None):
859 if data is None:
860 data = self.best_line_break
861 self.whitespace = True
862 self.indention = True
863 self.line += 1
864 self.column = 0
865 if self.encoding:
866 data = data.encode(self.encoding)
867 self.stream.write(data)
869 def write_version_directive(self, version_text):
870 data = u'%%YAML %s' % version_text
871 if self.encoding:
872 data = data.encode(self.encoding)
873 self.stream.write(data)
874 self.write_line_break()
876 def write_tag_directive(self, handle_text, prefix_text):
877 data = u'%%TAG %s %s' % (handle_text, prefix_text)
878 if self.encoding:
879 data = data.encode(self.encoding)
880 self.stream.write(data)
881 self.write_line_break()
883 # Scalar streams.
885 def write_single_quoted(self, text, split=True):
886 self.write_indicator(u'\'', True)
887 spaces = False
888 breaks = False
889 start = end = 0
890 while end <= len(text):
891 ch = None
892 if end < len(text):
893 ch = text[end]
894 if spaces:
895 if ch is None or ch != u' ':
896 if start+1 == end and self.column > self.best_width and split \
897 and start != 0 and end != len(text):
898 self.write_indent()
899 else:
900 data = text[start:end]
901 self.column += len(data)
902 if self.encoding:
903 data = data.encode(self.encoding)
904 self.stream.write(data)
905 start = end
906 elif breaks:
907 if ch is None or ch not in u'\n\x85\u2028\u2029':
908 if text[start] == u'\n':
909 self.write_line_break()
910 for br in text[start:end]:
911 if br == u'\n':
912 self.write_line_break()
913 else:
914 self.write_line_break(br)
915 self.write_indent()
916 start = end
917 else:
918 if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'':
919 if start < end:
920 data = text[start:end]
921 self.column += len(data)
922 if self.encoding:
923 data = data.encode(self.encoding)
924 self.stream.write(data)
925 start = end
926 if ch == u'\'':
927 data = u'\'\''
928 self.column += 2
929 if self.encoding:
930 data = data.encode(self.encoding)
931 self.stream.write(data)
932 start = end + 1
933 if ch is not None:
934 spaces = (ch == u' ')
935 breaks = (ch in u'\n\x85\u2028\u2029')
936 end += 1
937 self.write_indicator(u'\'', False)
939 ESCAPE_REPLACEMENTS = {
940 u'\0': u'0',
941 u'\x07': u'a',
942 u'\x08': u'b',
943 u'\x09': u't',
944 u'\x0A': u'n',
945 u'\x0B': u'v',
946 u'\x0C': u'f',
947 u'\x0D': u'r',
948 u'\x1B': u'e',
949 u'\"': u'\"',
950 u'\\': u'\\',
951 u'\x85': u'N',
952 u'\xA0': u'_',
953 u'\u2028': u'L',
954 u'\u2029': u'P',
957 def write_double_quoted(self, text, split=True):
958 self.write_indicator(u'"', True)
959 start = end = 0
960 while end <= len(text):
961 ch = None
962 if end < len(text):
963 ch = text[end]
964 if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \
965 or not (u'\x20' <= ch <= u'\x7E'
966 or (self.allow_unicode
967 and (u'\xA0' <= ch <= u'\uD7FF'
968 or u'\uE000' <= ch <= u'\uFFFD'))):
969 if start < end:
970 data = text[start:end]
971 self.column += len(data)
972 if self.encoding:
973 data = data.encode(self.encoding)
974 self.stream.write(data)
975 start = end
976 if ch is not None:
977 if ch in self.ESCAPE_REPLACEMENTS:
978 data = u'\\'+self.ESCAPE_REPLACEMENTS[ch]
979 elif ch <= u'\xFF':
980 data = u'\\x%02X' % ord(ch)
981 elif ch <= u'\uFFFF':
982 data = u'\\u%04X' % ord(ch)
983 else:
984 data = u'\\U%08X' % ord(ch)
985 self.column += len(data)
986 if self.encoding:
987 data = data.encode(self.encoding)
988 self.stream.write(data)
989 start = end+1
990 if 0 < end < len(text)-1 and (ch == u' ' or start >= end) \
991 and self.column+(end-start) > self.best_width and split:
992 data = text[start:end]+u'\\'
993 if start < end:
994 start = end
995 self.column += len(data)
996 if self.encoding:
997 data = data.encode(self.encoding)
998 self.stream.write(data)
999 self.write_indent()
1000 self.whitespace = False
1001 self.indention = False
1002 if text[start] == u' ':
1003 data = u'\\'
1004 self.column += len(data)
1005 if self.encoding:
1006 data = data.encode(self.encoding)
1007 self.stream.write(data)
1008 end += 1
1009 self.write_indicator(u'"', False)
1011 def determine_chomp(self, text):
1012 tail = text[-2:]
1013 while len(tail) < 2:
1014 tail = u' '+tail
1015 if tail[-1] in u'\n\x85\u2028\u2029':
1016 if tail[-2] in u'\n\x85\u2028\u2029':
1017 return u'+'
1018 else:
1019 return u''
1020 else:
1021 return u'-'
1023 def write_folded(self, text):
1024 chomp = self.determine_chomp(text)
1025 self.write_indicator(u'>'+chomp, True)
1026 self.write_indent()
1027 leading_space = False
1028 spaces = False
1029 breaks = False
1030 start = end = 0
1031 while end <= len(text):
1032 ch = None
1033 if end < len(text):
1034 ch = text[end]
1035 if breaks:
1036 if ch is None or ch not in u'\n\x85\u2028\u2029':
1037 if not leading_space and ch is not None and ch != u' ' \
1038 and text[start] == u'\n':
1039 self.write_line_break()
1040 leading_space = (ch == u' ')
1041 for br in text[start:end]:
1042 if br == u'\n':
1043 self.write_line_break()
1044 else:
1045 self.write_line_break(br)
1046 if ch is not None:
1047 self.write_indent()
1048 start = end
1049 elif spaces:
1050 if ch != u' ':
1051 if start+1 == end and self.column > self.best_width:
1052 self.write_indent()
1053 else:
1054 data = text[start:end]
1055 self.column += len(data)
1056 if self.encoding:
1057 data = data.encode(self.encoding)
1058 self.stream.write(data)
1059 start = end
1060 else:
1061 if ch is None or ch in u' \n\x85\u2028\u2029':
1062 data = text[start:end]
1063 if self.encoding:
1064 data = data.encode(self.encoding)
1065 self.stream.write(data)
1066 if ch is None:
1067 self.write_line_break()
1068 start = end
1069 if ch is not None:
1070 breaks = (ch in u'\n\x85\u2028\u2029')
1071 spaces = (ch == u' ')
1072 end += 1
1074 def write_literal(self, text):
1075 chomp = self.determine_chomp(text)
1076 self.write_indicator(u'|'+chomp, True)
1077 self.write_indent()
1078 breaks = False
1079 start = end = 0
1080 while end <= len(text):
1081 ch = None
1082 if end < len(text):
1083 ch = text[end]
1084 if breaks:
1085 if ch is None or ch not in u'\n\x85\u2028\u2029':
1086 for br in text[start:end]:
1087 if br == u'\n':
1088 self.write_line_break()
1089 else:
1090 self.write_line_break(br)
1091 if ch is not None:
1092 self.write_indent()
1093 start = end
1094 else:
1095 if ch is None or ch in u'\n\x85\u2028\u2029':
1096 data = text[start:end]
1097 if self.encoding:
1098 data = data.encode(self.encoding)
1099 self.stream.write(data)
1100 if ch is None:
1101 self.write_line_break()
1102 start = end
1103 if ch is not None:
1104 breaks = (ch in u'\n\x85\u2028\u2029')
1105 end += 1
1107 def write_plain(self, text, split=True):
1108 if not text:
1109 return
1110 if not self.whitespace:
1111 data = u' '
1112 self.column += len(data)
1113 if self.encoding:
1114 data = data.encode(self.encoding)
1115 self.stream.write(data)
1116 self.writespace = False
1117 self.indention = False
1118 spaces = False
1119 breaks = False
1120 start = end = 0
1121 while end <= len(text):
1122 ch = None
1123 if end < len(text):
1124 ch = text[end]
1125 if spaces:
1126 if ch != u' ':
1127 if start+1 == end and self.column > self.best_width and split:
1128 self.write_indent()
1129 self.writespace = False
1130 self.indention = False
1131 else:
1132 data = text[start:end]
1133 self.column += len(data)
1134 if self.encoding:
1135 data = data.encode(self.encoding)
1136 self.stream.write(data)
1137 start = end
1138 elif breaks:
1139 if ch not in u'\n\x85\u2028\u2029':
1140 if text[start] == u'\n':
1141 self.write_line_break()
1142 for br in text[start:end]:
1143 if br == u'\n':
1144 self.write_line_break()
1145 else:
1146 self.write_line_break(br)
1147 self.write_indent()
1148 self.whitespace = False
1149 self.indention = False
1150 start = end
1151 else:
1152 if ch is None or ch in u' \n\x85\u2028\u2029':
1153 data = text[start:end]
1154 self.column += len(data)
1155 if self.encoding:
1156 data = data.encode(self.encoding)
1157 self.stream.write(data)
1158 start = end
1159 if ch is not None:
1160 spaces = (ch == u' ')
1161 breaks = (ch in u'\n\x85\u2028\u2029')
1162 end += 1