lib/yaml/emitter.py

   1
   2 # Emitter expects events obeying the following grammar:
   3 # stream ::= STREAM-START document* STREAM-END
   4 # document ::= DOCUMENT-START node DOCUMENT-END
   5 # node ::= SCALAR | sequence | mapping
   6 # sequence ::= SEQUENCE-START node* SEQUENCE-END
   7 # mapping ::= MAPPING-START (node node)* MAPPING-END
   8
   9 __all__ = ['Emitter', 'EmitterError']
  10
  11 from error import YAMLError
  12 from events import *
  13
  14 import re
  15
  16 class EmitterError(YAMLError):
  17     pass
  18
  19 class ScalarAnalysis(object):
  20     def __init__(self, scalar, empty, multiline,
  21             allow_flow_plain, allow_block_plain,
  22             allow_single_quoted, allow_double_quoted,
  23             allow_block):
  24         self.scalar = scalar
  25         self.empty = empty
  26         self.multiline = multiline
  27         self.allow_flow_plain = allow_flow_plain
  28         self.allow_block_plain = allow_block_plain
  29         self.allow_single_quoted = allow_single_quoted
  30         self.allow_double_quoted = allow_double_quoted
  31         self.allow_block = allow_block
  32
  33 class Emitter(object):
  34
  35     DEFAULT_TAG_PREFIXES = {
  36         u'!' : u'!',
  37         u'tag:yaml.org,2002:' : u'!!',
  38     }
  39
  40     def __init__(self, stream, canonical=None, indent=None, width=None,
  41             allow_unicode=None, line_break=None):
  42
  43         # The stream should have the methods `write` and possibly `flush`.
  44         self.stream = stream
  45
  46         # Encoding can be overriden by STREAM-START.
  47         self.encoding = None
  48
  49         # Emitter is a state machine with a stack of states to handle nested
  50         # structures.
  51         self.states = []
  52         self.state = self.expect_stream_start
  53
  54         # Current event and the event queue.
  55         self.events = []
  56         self.event = None
  57
  58         # The current indentation level and the stack of previous indents.
  59         self.indents = []
  60         self.indent = None
  61
  62         # Flow level.
  63         self.flow_level = 0
  64
  65         # Contexts.
  66         self.root_context = False
  67         self.sequence_context = False
  68         self.mapping_context = False
  69         self.simple_key_context = False
  70
  71         # Characteristics of the last emitted character:
  72         #  - current position.
  73         #  - is it a whitespace?
  74         #  - is it an indention character
  75         #    (indentation space, '-', '?', or ':')?
  76         self.line = 0
  77         self.column = 0
  78         self.whitespace = True
  79         self.indention = True
  80
  81         # Formatting details.
  82         self.canonical = canonical
  83         self.allow_unicode = allow_unicode
  84         self.best_indent = 2
  85         if indent and 1 < indent < 10:
  86             self.best_indent = indent
  87         self.best_width = 80
  88         if width and width > self.best_indent*2:
  89             self.best_width = width
  90         self.best_line_break = u'\n'
  91         if line_break in [u'\r', u'\n', u'\r\n']:
  92             self.best_line_break = line_break
  93
  94         # Tag prefixes.
  95         self.tag_prefixes = None
  96
  97         # Prepared anchor and tag.
  98         self.prepared_anchor = None
  99         self.prepared_tag = None
 100
 101         # Scalar analysis and style.
 102         self.analysis = None
 103         self.style = None
 104
 105     def emit(self, event):
 106         self.events.append(event)
 107         while not self.need_more_events():
 108             self.event = self.events.pop(0)
 109             self.state()
 110             self.event = None
 111
 112     # In some cases, we wait for a few next events before emitting.
 113
 114     def need_more_events(self):
 115         if not self.events:
 116             return True
 117         event = self.events[0]
 118         if isinstance(event, DocumentStartEvent):
 119             return self.need_events(1)
 120         elif isinstance(event, SequenceStartEvent):
 121             return self.need_events(2)
 122         elif isinstance(event, MappingStartEvent):
 123             return self.need_events(3)
 124         else:
 125             return False
 126
 127     def need_events(self, count):
 128         level = 0
 129         for event in self.events[1:]:
 130             if isinstance(event, (DocumentStartEvent, CollectionStartEvent)):
 131                 level += 1
 132             elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)):
 133                 level -= 1
 134             elif isinstance(event, StreamEndEvent):
 135                 level = -1
 136             if level < 0:
 137                 return False
 138         return (len(self.events) < count+1)
 139
 140     def increase_indent(self, flow=False, indentless=False):
 141         self.indents.append(self.indent)
 142         if self.indent is None:
 143             if flow:
 144                 self.indent = self.best_indent
 145             else:
 146                 self.indent = 0
 147         elif not indentless:
 148             self.indent += self.best_indent
 149
 150     # States.
 151
 152     # Stream handlers.
 153
 154     def expect_stream_start(self):
 155         if isinstance(self.event, StreamStartEvent):
 156             if self.event.encoding:
 157                 self.encoding = self.event.encoding
 158             self.write_stream_start()
 159             self.state = self.expect_first_document_start
 160         else:
 161             raise EmitterError("expected StreamStartEvent, but got %s"
 162                     % self.event)
 163
 164     def expect_nothing(self):
 165         raise EmitterError("expected nothing, but got %s" % self.event)
 166
 167     # Document handlers.
 168
 169     def expect_first_document_start(self):
 170         return self.expect_document_start(first=True)
 171
 172     def expect_document_start(self, first=False):
 173         if isinstance(self.event, DocumentStartEvent):
 174             if self.event.version:
 175                 version_text = self.prepare_version(self.event.version)
 176                 self.write_version_directive(version_text)
 177             self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy()
 178             if self.event.tags:
 179                 handles = self.event.tags.keys()
 180                 handles.sort()
 181                 for handle in handles:
 182                     prefix = self.event.tags[handle]
 183                     self.tag_prefixes[prefix] = handle
 184                     handle_text = self.prepare_tag_handle(handle)
 185                     prefix_text = self.prepare_tag_prefix(prefix)
 186                     self.write_tag_directive(handle_text, prefix_text)
 187             implicit = (first and not self.event.explicit and not self.canonical
 188                     and not self.event.version and not self.event.tags
 189                     and not self.check_empty_document())
 190             if not implicit:
 191                 self.write_indent()
 192                 self.write_indicator(u'---', True)
 193                 if self.canonical:
 194                     self.write_indent()
 195             self.state = self.expect_document_root
 196         elif isinstance(self.event, StreamEndEvent):
 197             self.write_stream_end()
 198             self.state = self.expect_nothing
 199         else:
 200             raise EmitterError("expected DocumentStartEvent, but got %s"
 201                     % self.event)
 202
 203     def expect_document_end(self):
 204         if isinstance(self.event, DocumentEndEvent):
 205             self.write_indent()
 206             if self.event.explicit:
 207                 self.write_indicator(u'...', True)
 208                 self.write_indent()
 209             self.flush_stream()
 210             self.state = self.expect_document_start
 211         else:
 212             raise EmitterError("expected DocumentEndEvent, but got %s"
 213                     % self.event)
 214
 215     def expect_document_root(self):
 216         self.states.append(self.expect_document_end)
 217         self.expect_node(root=True)
 218
 219     # Node handlers.
 220
 221     def expect_node(self, root=False, sequence=False, mapping=False,
 222             simple_key=False):
 223         self.root_context = root
 224         self.sequence_context = sequence
 225         self.mapping_context = mapping
 226         self.simple_key_context = simple_key
 227         if isinstance(self.event, AliasEvent):
 228             self.expect_alias()
 229         elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)):
 230             self.process_anchor(u'&')
 231             self.process_tag()
 232             if isinstance(self.event, ScalarEvent):
 233                 self.expect_scalar()
 234             elif isinstance(self.event, SequenceStartEvent):
 235                 if self.flow_level or self.canonical or self.event.flow_style   \
 236                         or self.check_empty_sequence():
 237                     self.expect_flow_sequence()
 238                 else:
 239                     self.expect_block_sequence()
 240             elif isinstance(self.event, MappingStartEvent):
 241                 if self.flow_level or self.canonical or self.event.flow_style   \
 242                         or self.check_empty_mapping():
 243                     self.expect_flow_mapping()
 244                 else:
 245                     self.expect_block_mapping()
 246         else:
 247             raise EmitterError("expected NodeEvent, but got %s" % self.event)
 248
 249     def expect_alias(self):
 250         if self.event.anchor is None:
 251             raise EmitterError("anchor is not specified for alias")
 252         self.process_anchor(u'*')
 253         self.state = self.states.pop()
 254
 255     def expect_scalar(self):
 256         self.increase_indent(flow=True)
 257         self.process_scalar()
 258         self.indent = self.indents.pop()
 259         self.state = self.states.pop()
 260
 261     # Flow sequence handlers.
 262
 263     def expect_flow_sequence(self):
 264         self.write_indicator(u'[', True, whitespace=True)
 265         self.flow_level += 1
 266         self.increase_indent(flow=True)
 267         self.state = self.expect_first_flow_sequence_item
 268
 269     def expect_first_flow_sequence_item(self):
 270         if isinstance(self.event, SequenceEndEvent):
 271             self.indent = self.indents.pop()
 272             self.flow_level -= 1
 273             self.write_indicator(u']', False)
 274             self.state = self.states.pop()
 275         else:
 276             if self.canonical or self.column > self.best_width:
 277                 self.write_indent()
 278             self.states.append(self.expect_flow_sequence_item)
 279             self.expect_node(sequence=True)
 280
 281     def expect_flow_sequence_item(self):
 282         if isinstance(self.event, SequenceEndEvent):
 283             self.indent = self.indents.pop()
 284             self.flow_level -= 1
 285             if self.canonical:
 286                 self.write_indicator(u',', False)
 287                 self.write_indent()
 288             self.write_indicator(u']', False)
 289             self.state = self.states.pop()
 290         else:
 291             self.write_indicator(u',', False)
 292             if self.canonical or self.column > self.best_width:
 293                 self.write_indent()
 294             self.states.append(self.expect_flow_sequence_item)
 295             self.expect_node(sequence=True)
 296
 297     # Flow mapping handlers.
 298
 299     def expect_flow_mapping(self):
 300         self.write_indicator(u'{', True, whitespace=True)
 301         self.flow_level += 1
 302         self.increase_indent(flow=True)
 303         self.state = self.expect_first_flow_mapping_key
 304
 305     def expect_first_flow_mapping_key(self):
 306         if isinstance(self.event, MappingEndEvent):
 307             self.indent = self.indents.pop()
 308             self.flow_level -= 1
 309             self.write_indicator(u'}', False)
 310             self.state = self.states.pop()
 311         else:
 312             if self.canonical or self.column > self.best_width:
 313                 self.write_indent()
 314             if not self.canonical and self.check_simple_key():
 315                 self.states.append(self.expect_flow_mapping_simple_value)
 316                 self.expect_node(mapping=True, simple_key=True)
 317             else:
 318                 self.write_indicator(u'?', True)
 319                 self.states.append(self.expect_flow_mapping_value)
 320                 self.expect_node(mapping=True)
 321
 322     def expect_flow_mapping_key(self):
 323         if isinstance(self.event, MappingEndEvent):
 324             self.indent = self.indents.pop()
 325             self.flow_level -= 1
 326             if self.canonical:
 327                 self.write_indicator(u',', False)
 328                 self.write_indent()
 329             self.write_indicator(u'}', False)
 330             self.state = self.states.pop()
 331         else:
 332             self.write_indicator(u',', False)
 333             if self.canonical or self.column > self.best_width:
 334                 self.write_indent()
 335             if not self.canonical and self.check_simple_key():
 336                 self.states.append(self.expect_flow_mapping_simple_value)
 337                 self.expect_node(mapping=True, simple_key=True)
 338             else:
 339                 self.write_indicator(u'?', True)
 340                 self.states.append(self.expect_flow_mapping_value)
 341                 self.expect_node(mapping=True)
 342
 343     def expect_flow_mapping_simple_value(self):
 344         self.write_indicator(u':', False)
 345         self.states.append(self.expect_flow_mapping_key)
 346         self.expect_node(mapping=True)
 347
 348     def expect_flow_mapping_value(self):
 349         if self.canonical or self.column > self.best_width:
 350             self.write_indent()
 351         self.write_indicator(u':', True)
 352         self.states.append(self.expect_flow_mapping_key)
 353         self.expect_node(mapping=True)
 354
 355     # Block sequence handlers.
 356
 357     def expect_block_sequence(self):
 358         indentless = (self.mapping_context and not self.indention)
 359         self.increase_indent(flow=False, indentless=indentless)
 360         self.state = self.expect_first_block_sequence_item
 361
 362     def expect_first_block_sequence_item(self):
 363         return self.expect_block_sequence_item(first=True)
 364
 365     def expect_block_sequence_item(self, first=False):
 366         if not first and isinstance(self.event, SequenceEndEvent):
 367             self.indent = self.indents.pop()
 368             self.state = self.states.pop()
 369         else:
 370             self.write_indent()
 371             self.write_indicator(u'-', True, indention=True)
 372             self.states.append(self.expect_block_sequence_item)
 373             self.expect_node(sequence=True)
 374
 375     # Block mapping handlers.
 376
 377     def expect_block_mapping(self):
 378         self.increase_indent(flow=False)
 379         self.state = self.expect_first_block_mapping_key
 380
 381     def expect_first_block_mapping_key(self):
 382         return self.expect_block_mapping_key(first=True)
 383
 384     def expect_block_mapping_key(self, first=False):
 385         if not first and isinstance(self.event, MappingEndEvent):
 386             self.indent = self.indents.pop()
 387             self.state = self.states.pop()
 388         else:
 389             self.write_indent()
 390             if self.check_simple_key():
 391                 self.states.append(self.expect_block_mapping_simple_value)
 392                 self.expect_node(mapping=True, simple_key=True)
 393             else:
 394                 self.write_indicator(u'?', True, indention=True)
 395                 self.states.append(self.expect_block_mapping_value)
 396                 self.expect_node(mapping=True)
 397
 398     def expect_block_mapping_simple_value(self):
 399         self.write_indicator(u':', False)
 400         self.states.append(self.expect_block_mapping_key)
 401         self.expect_node(mapping=True)
 402
 403     def expect_block_mapping_value(self):
 404         self.write_indent()
 405         self.write_indicator(u':', True, indention=True)
 406         self.states.append(self.expect_block_mapping_key)
 407         self.expect_node(mapping=True)
 408
 409     # Checkers.
 410
 411     def check_empty_sequence(self):
 412         return (isinstance(self.event, SequenceStartEvent) and self.events
 413                 and isinstance(self.events[0], SequenceEndEvent))
 414
 415     def check_empty_mapping(self):
 416         return (isinstance(self.event, MappingStartEvent) and self.events
 417                 and isinstance(self.events[0], MappingEndEvent))
 418
 419     def check_empty_document(self):
 420         if not isinstance(self.event, DocumentStartEvent) or not self.events:
 421             return False
 422         event = self.events[0]
 423         return (isinstance(event, ScalarEvent) and event.anchor is None
 424                 and event.tag is None and event.implicit and event.value == u'')
 425
 426     def check_simple_key(self):
 427         length = 0
 428         if isinstance(self.event, NodeEvent) and self.event.anchor is not None:
 429             if self.prepared_anchor is None:
 430                 self.prepared_anchor = self.prepare_anchor(self.event.anchor)
 431             length += len(self.prepared_anchor)
 432         if isinstance(self.event, (ScalarEvent, CollectionStartEvent))  \
 433                 and self.event.tag is not None:
 434             if self.prepared_tag is None:
 435                 self.prepared_tag = self.prepare_tag(self.event.tag)
 436             length += len(self.prepared_tag)
 437         if isinstance(self.event, ScalarEvent):
 438             if self.analysis is None:
 439                 self.analysis = self.analyze_scalar(self.event.value)
 440             length += len(self.analysis.scalar)
 441         return (length < 128 and (isinstance(self.event, AliasEvent)
 442             or (isinstance(self.event, ScalarEvent)
 443                     and not self.analysis.empty and not self.analysis.multiline)
 444             or self.check_empty_sequence() or self.check_empty_mapping()))
 445
 446     # Anchor, Tag, and Scalar processors.
 447
 448     def process_anchor(self, indicator):
 449         if self.event.anchor is None:
 450             self.prepared_anchor = None
 451             return
 452         if self.prepared_anchor is None:
 453             self.prepared_anchor = self.prepare_anchor(self.event.anchor)
 454         if self.prepared_anchor:
 455             self.write_indicator(indicator+self.prepared_anchor, True)
 456         self.prepared_anchor = None
 457
 458     def process_tag(self):
 459         tag = self.event.tag
 460         if isinstance(self.event, ScalarEvent):
 461             if self.style is None:
 462                 self.style = self.choose_scalar_style()
 463             if ((not self.canonical or tag is None) and
 464                 ((self.style == '' and self.event.implicit[0])
 465                         or (self.style != '' and self.event.implicit[1]))):
 466                 self.prepared_tag = None
 467                 return
 468             if self.event.implicit[0] and tag is None:
 469                 tag = u'!'
 470                 self.prepared_tag = None
 471         else:
 472             if (not self.canonical or tag is None) and self.event.implicit:
 473                 self.prepared_tag = None
 474                 return
 475         if tag is None:
 476             raise EmitterError("tag is not specified")
 477         if self.prepared_tag is None:
 478             self.prepared_tag = self.prepare_tag(tag)
 479         if self.prepared_tag:
 480             self.write_indicator(self.prepared_tag, True)
 481         self.prepared_tag = None
 482
 483     def choose_scalar_style(self):
 484         if self.analysis is None:
 485             self.analysis = self.analyze_scalar(self.event.value)
 486         if self.event.style == '"' or self.canonical:
 487             return '"'
 488         if not self.event.style and self.event.implicit[0]:
 489             if (not (self.simple_key_context and
 490                     (self.analysis.empty or self.analysis.multiline))
 491                 and (self.flow_level and self.analysis.allow_flow_plain
 492                     or (not self.flow_level and self.analysis.allow_block_plain))):
 493                 return ''
 494         if self.event.style and self.event.style in '|>':
 495             if (not self.flow_level and not self.simple_key_context
 496                     and self.analysis.allow_block):
 497                 return self.event.style
 498         if not self.event.style or self.event.style == '\'':
 499             if (self.analysis.allow_single_quoted and
 500                     not (self.simple_key_context and self.analysis.multiline)):
 501                 return '\''
 502         return '"'
 503
 504     def process_scalar(self):
 505         if self.analysis is None:
 506             self.analysis = self.analyze_scalar(self.event.value)
 507         if self.style is None:
 508             self.style = self.choose_scalar_style()
 509         split = (not self.simple_key_context)
 510         #if self.analysis.multiline and split    \
 511         #        and (not self.style or self.style in '\'\"'):
 512         #    self.write_indent()
 513         if self.style == '"':
 514             self.write_double_quoted(self.analysis.scalar, split)
 515         elif self.style == '\'':
 516             self.write_single_quoted(self.analysis.scalar, split)
 517         elif self.style == '>':
 518             self.write_folded(self.analysis.scalar)
 519         elif self.style == '|':
 520             self.write_literal(self.analysis.scalar)
 521         else:
 522             self.write_plain(self.analysis.scalar, split)
 523         self.analysis = None
 524         self.style = None
 525
 526     # Analyzers.
 527
 528     def prepare_version(self, version):
 529         major, minor = version
 530         if major != 1:
 531             raise EmitterError("unsupported YAML version: %d.%d" % (major, minor))
 532         return u'%d.%d' % (major, minor)
 533
 534     def prepare_tag_handle(self, handle):
 535         if not handle:
 536             raise EmitterError("tag handle must not be empty")
 537         if handle[0] != u'!' or handle[-1] != u'!':
 538             raise EmitterError("tag handle must start and end with '!': %r"
 539                     % (handle.encode('utf-8')))
 540         for ch in handle[1:-1]:
 541             if not (u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z'    \
 542                     or ch in u'-_'):
 543                 raise EmitterError("invalid character %r in the tag handle: %r"
 544                         % (ch.encode('utf-8'), handle.encode('utf-8')))
 545         return handle
 546
 547     def prepare_tag_prefix(self, prefix):
 548         if not prefix:
 549             raise EmitterError("tag prefix must not be empty")
 550         chunks = []
 551         start = end = 0
 552         if prefix[0] == u'!':
 553             end = 1
 554         while end < len(prefix):
 555             ch = prefix[end]
 556             if u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z'  \
 557                     or ch in u'-;/?!:@&=+$,_.~*\'()[]':
 558                 end += 1
 559             else:
 560                 if start < end:
 561                     chunks.append(prefix[start:end])
 562                 start = end = end+1
 563                 data = ch.encode('utf-8')
 564                 for ch in data:
 565                     chunks.append(u'%%%02X' % ord(ch))
 566         if start < end:
 567             chunks.append(prefix[start:end])
 568         return u''.join(chunks)
 569
 570     def prepare_tag(self, tag):
 571         if not tag:
 572             raise EmitterError("tag must not be empty")
 573         if tag == u'!':
 574             return tag
 575         handle = None
 576         suffix = tag
 577         for prefix in self.tag_prefixes:
 578             if tag.startswith(prefix)   \
 579                     and (prefix == u'!' or len(prefix) < len(tag)):
 580                 handle = self.tag_prefixes[prefix]
 581                 suffix = tag[len(prefix):]
 582         chunks = []
 583         start = end = 0
 584         while end < len(suffix):
 585             ch = suffix[end]
 586             if u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z'  \
 587                     or ch in u'-;/?:@&=+$,_.~*\'()[]'   \
 588                     or (ch == u'!' and handle != u'!'):
 589                 end += 1
 590             else:
 591                 if start < end:
 592                     chunks.append(suffix[start:end])
 593                 start = end = end+1
 594                 data = ch.encode('utf-8')
 595                 for ch in data:
 596                     chunks.append(u'%%%02X' % ord(ch))
 597         if start < end:
 598             chunks.append(suffix[start:end])
 599         suffix_text = u''.join(chunks)
 600         if handle:
 601             return u'%s%s' % (handle, suffix_text)
 602         else:
 603             return u'!<%s>' % suffix_text
 604
 605     def prepare_anchor(self, anchor):
 606         if not anchor:
 607             raise EmitterError("anchor must not be empty")
 608         for ch in anchor:
 609             if not (u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z'    \
 610                     or ch in u'-_'):
 611                 raise EmitterError("invalid character %r in the anchor: %r"
 612                         % (ch.encode('utf-8'), anchor.encode('utf-8')))
 613         return anchor
 614
 615     def analyze_scalar(self, scalar):
 616
 617         # Empty scalar is a special case.
 618         if not scalar:
 619             return ScalarAnalysis(scalar=scalar, empty=True, multiline=False,
 620                     allow_flow_plain=False, allow_block_plain=True,
 621                     allow_single_quoted=True, allow_double_quoted=True,
 622                     allow_block=False)
 623
 624         # Indicators and special characters.
 625         block_indicators = False
 626         flow_indicators = False
 627         line_breaks = False
 628         special_characters = False
 629
 630         # Whitespaces.
 631         inline_spaces = False          # non-space space+ non-space
 632         inline_breaks = False          # non-space break+ non-space
 633         leading_spaces = False         # ^ space+ (non-space | $)
 634         leading_breaks = False         # ^ break+ (non-space | $)
 635         trailing_spaces = False        # (^ | non-space) space+ $
 636         trailing_breaks = False        # (^ | non-space) break+ $
 637         inline_breaks_spaces = False   # non-space break+ space+ non-space
 638         mixed_breaks_spaces = False    # anything else
 639
 640         # Check document indicators.
 641         if scalar.startswith(u'---') or scalar.startswith(u'...'):
 642             block_indicators = True
 643             flow_indicators = True
 644
 645         # First character or preceded by a whitespace.
 646         preceeded_by_space = True
 647
 648         # Last character or followed by a whitespace.
 649         followed_by_space = (len(scalar) == 1 or
 650                 scalar[1] in u'\0 \t\r\n\x85\u2028\u2029')
 651
 652         # The current series of whitespaces contain plain spaces.
 653         spaces = False
 654
 655         # The current series of whitespaces contain line breaks.
 656         breaks = False
 657
 658         # The current series of whitespaces contain a space followed by a
 659         # break.
 660         mixed = False
 661
 662         # The current series of whitespaces start at the beginning of the
 663         # scalar.
 664         leading = False
 665
 666         index = 0
 667         while index < len(scalar):
 668             ch = scalar[index]
 669
 670             # Check for indicators.
 671
 672             if index == 0:
 673                 # Leading indicators are special characters.
 674                 if ch in u'#,[]{}&*!|>\'\"%@`':
 675                     flow_indicators = True
 676                     block_indicators = True
 677                 if ch in u'?:':
 678                     flow_indicators = True
 679                     if followed_by_space:
 680                         block_indicators = True
 681                 if ch == u'-' and followed_by_space:
 682                     flow_indicators = True
 683                     block_indicators = True
 684             else:
 685                 # Some indicators cannot appear within a scalar as well.
 686                 if ch in u',?[]{}':
 687                     flow_indicators = True
 688                 if ch == u':':
 689                     flow_indicators = True
 690                     if followed_by_space:
 691                         block_indicators = True
 692                 if ch == u'#' and preceeded_by_space:
 693                     flow_indicators = True
 694                     block_indicators = True
 695
 696             # Check for line breaks, special, and unicode characters.
 697
 698             if ch in u'\n\x85\u2028\u2029':
 699                 line_breaks = True
 700             if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'):
 701                 if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF'
 702                         or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF':
 703                     unicode_characters = True
 704                     if not self.allow_unicode:
 705                         special_characters = True
 706                 else:
 707                     special_characters = True
 708
 709             # Spaces, line breaks, and how they are mixed. State machine.
 710
 711             # Start or continue series of whitespaces.
 712             if ch in u' \n\x85\u2028\u2029':
 713                 if spaces and breaks:
 714                     if ch != u' ':      # break+ (space+ break+)    => mixed
 715                         mixed = True
 716                 elif spaces:
 717                     if ch != u' ':      # (space+ break+)   => mixed
 718                         breaks = True
 719                         mixed = True
 720                 elif breaks:
 721                     if ch == u' ':      # break+ space+
 722                         spaces = True
 723                 else:
 724                     leading = (index == 0)
 725                     if ch == u' ':      # space+
 726                         spaces = True
 727                     else:               # break+
 728                         breaks = True
 729
 730             # Series of whitespaces ended with a non-space.
 731             elif spaces or breaks:
 732                 if leading:
 733                     if spaces and breaks:
 734                         mixed_breaks_spaces = True
 735                     elif spaces:
 736                         leading_spaces = True
 737                     elif breaks:
 738                         leading_breaks = True
 739                 else:
 740                     if mixed:
 741                         mixed_breaks_spaces = True
 742                     elif spaces and breaks:
 743                         inline_breaks_spaces = True
 744                     elif spaces:
 745                         inline_spaces = True
 746                     elif breaks:
 747                         inline_breaks = True
 748                 spaces = breaks = mixed = leading = False
 749
 750             # Series of whitespaces reach the end.
 751             if (spaces or breaks) and (index == len(scalar)-1):
 752                 if spaces and breaks:
 753                     mixed_breaks_spaces = True
 754                 elif spaces:
 755                     trailing_spaces = True
 756                     if leading:
 757                         leading_spaces = True
 758                 elif breaks:
 759                     trailing_breaks = True
 760                     if leading:
 761                         leading_breaks = True
 762                 spaces = breaks = mixed = leading = False
 763
 764             # Prepare for the next character.
 765             index += 1
 766             preceeded_by_space = (ch in u'\0 \t\r\n\x85\u2028\u2029')
 767             followed_by_space = (index+1 >= len(scalar) or
 768                     scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029')
 769
 770         # Let's decide what styles are allowed.
 771         allow_flow_plain = True
 772         allow_block_plain = True
 773         allow_single_quoted = True
 774         allow_double_quoted = True
 775         allow_block = True
 776
 777         # Leading and trailing whitespace are bad for plain scalars. We also
 778         # do not want to mess with leading whitespaces for block scalars.
 779         if leading_spaces or leading_breaks or trailing_spaces:
 780             allow_flow_plain = allow_block_plain = allow_block = False
 781
 782         # Trailing breaks are fine for block scalars, but unacceptable for
 783         # plain scalars.
 784         if trailing_breaks:
 785             allow_flow_plain = allow_block_plain = False
 786
 787         # The combination of (space+ break+) is only acceptable for block
 788         # scalars.
 789         if inline_breaks_spaces:
 790             allow_flow_plain = allow_block_plain = allow_single_quoted = False
 791
 792         # Mixed spaces and breaks, as well as special character are only
 793         # allowed for double quoted scalars.
 794         if mixed_breaks_spaces or special_characters:
 795             allow_flow_plain = allow_block_plain =  \
 796             allow_single_quoted = allow_block = False
 797
 798         # We don't emit multiline plain scalars.
 799         if line_breaks:
 800             allow_flow_plain = allow_block_plain = False
 801
 802         # Flow indicators are forbidden for flow plain scalars.
 803         if flow_indicators:
 804             allow_flow_plain = False
 805
 806         # Block indicators are forbidden for block plain scalars.
 807         if block_indicators:
 808             allow_block_plain = False
 809
 810         return ScalarAnalysis(scalar=scalar,
 811                 empty=False, multiline=line_breaks,
 812                 allow_flow_plain=allow_flow_plain,
 813                 allow_block_plain=allow_block_plain,
 814                 allow_single_quoted=allow_single_quoted,
 815                 allow_double_quoted=allow_double_quoted,
 816                 allow_block=allow_block)
 817
 818     # Writers.
 819
 820     def flush_stream(self):
 821         if hasattr(self.stream, 'flush'):
 822             self.stream.flush()
 823
 824     def write_stream_start(self):
 825         # Write BOM if needed.
 826         if self.encoding and self.encoding.startswith('utf-16'):
 827             self.stream.write(u'\xFF\xFE'.encode(self.encoding))
 828
 829     def write_stream_end(self):
 830         self.flush_stream()
 831
 832     def write_indicator(self, indicator, need_whitespace,
 833             whitespace=False, indention=False):
 834         if self.whitespace or not need_whitespace:
 835             data = indicator
 836         else:
 837             data = u' '+indicator
 838         self.whitespace = whitespace
 839         self.indention = self.indention and indention
 840         self.column += len(data)
 841         if self.encoding:
 842             data = data.encode(self.encoding)
 843         self.stream.write(data)
 844
 845     def write_indent(self):
 846         indent = self.indent or 0
 847         if not self.indention or self.column > indent   \
 848                 or (self.column == indent and not self.whitespace):
 849             self.write_line_break()
 850         if self.column < indent:
 851             self.whitespace = True
 852             data = u' '*(indent-self.column)
 853             self.column = indent
 854             if self.encoding:
 855                 data = data.encode(self.encoding)
 856             self.stream.write(data)
 857
 858     def write_line_break(self, data=None):
 859         if data is None:
 860             data = self.best_line_break
 861         self.whitespace = True
 862         self.indention = True
 863         self.line += 1
 864         self.column = 0
 865         if self.encoding:
 866             data = data.encode(self.encoding)
 867         self.stream.write(data)
 868
 869     def write_version_directive(self, version_text):
 870         data = u'%%YAML %s' % version_text
 871         if self.encoding:
 872             data = data.encode(self.encoding)
 873         self.stream.write(data)
 874         self.write_line_break()
 875
 876     def write_tag_directive(self, handle_text, prefix_text):
 877         data = u'%%TAG %s %s' % (handle_text, prefix_text)
 878         if self.encoding:
 879             data = data.encode(self.encoding)
 880         self.stream.write(data)
 881         self.write_line_break()
 882
 883     # Scalar streams.
 884
 885     def write_single_quoted(self, text, split=True):
 886         self.write_indicator(u'\'', True)
 887         spaces = False
 888         breaks = False
 889         start = end = 0
 890         while end <= len(text):
 891             ch = None
 892             if end < len(text):
 893                 ch = text[end]
 894             if spaces:
 895                 if ch is None or ch != u' ':
 896                     if start+1 == end and self.column > self.best_width and split   \
 897                             and start != 0 and end != len(text):
 898                         self.write_indent()
 899                     else:
 900                         data = text[start:end]
 901                         self.column += len(data)
 902                         if self.encoding:
 903                             data = data.encode(self.encoding)
 904                         self.stream.write(data)
 905                     start = end
 906             elif breaks:
 907                 if ch is None or ch not in u'\n\x85\u2028\u2029':
 908                     if text[start] == u'\n':
 909                         self.write_line_break()
 910                     for br in text[start:end]:
 911                         if br == u'\n':
 912                             self.write_line_break()
 913                         else:
 914                             self.write_line_break(br)
 915                     self.write_indent()
 916                     start = end
 917             else:
 918                 if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'':
 919                     if start < end:
 920                         data = text[start:end]
 921                         self.column += len(data)
 922                         if self.encoding:
 923                             data = data.encode(self.encoding)
 924                         self.stream.write(data)
 925                         start = end
 926             if ch == u'\'':
 927                 data = u'\'\''
 928                 self.column += 2
 929                 if self.encoding:
 930                     data = data.encode(self.encoding)
 931                 self.stream.write(data)
 932                 start = end + 1
 933             if ch is not None:
 934                 spaces = (ch == u' ')
 935                 breaks = (ch in u'\n\x85\u2028\u2029')
 936             end += 1
 937         self.write_indicator(u'\'', False)
 938
 939     ESCAPE_REPLACEMENTS = {
 940         u'\0':      u'0',
 941         u'\x07':    u'a',
 942         u'\x08':    u'b',
 943         u'\x09':    u't',
 944         u'\x0A':    u'n',
 945         u'\x0B':    u'v',
 946         u'\x0C':    u'f',
 947         u'\x0D':    u'r',
 948         u'\x1B':    u'e',
 949         u'\"':      u'\"',
 950         u'\\':      u'\\',
 951         u'\x85':    u'N',
 952         u'\xA0':    u'_',
 953         u'\u2028':  u'L',
 954         u'\u2029':  u'P',
 955     }
 956
 957     def write_double_quoted(self, text, split=True):
 958         self.write_indicator(u'"', True)
 959         start = end = 0
 960         while end <= len(text):
 961             ch = None
 962             if end < len(text):
 963                 ch = text[end]
 964             if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \
 965                     or not (u'\x20' <= ch <= u'\x7E'
 966                         or (self.allow_unicode
 967                             and (u'\xA0' <= ch <= u'\uD7FF'
 968                                 or u'\uE000' <= ch <= u'\uFFFD'))):
 969                 if start < end:
 970                     data = text[start:end]
 971                     self.column += len(data)
 972                     if self.encoding:
 973                         data = data.encode(self.encoding)
 974                     self.stream.write(data)
 975                     start = end
 976                 if ch is not None:
 977                     if ch in self.ESCAPE_REPLACEMENTS:
 978                         data = u'\\'+self.ESCAPE_REPLACEMENTS[ch]
 979                     elif ch <= u'\xFF':
 980                         data = u'\\x%02X' % ord(ch)
 981                     elif ch <= u'\uFFFF':
 982                         data = u'\\u%04X' % ord(ch)
 983                     else:
 984                         data = u'\\U%08X' % ord(ch)
 985                     self.column += len(data)
 986                     if self.encoding:
 987                         data = data.encode(self.encoding)
 988                     self.stream.write(data)
 989                     start = end+1
 990             if 0 < end < len(text)-1 and (ch == u' ' or start >= end)   \
 991                     and self.column+(end-start) > self.best_width and split:
 992                 data = text[start:end]+u'\\'
 993                 if start < end:
 994                     start = end
 995                 self.column += len(data)
 996                 if self.encoding:
 997                     data = data.encode(self.encoding)
 998                 self.stream.write(data)
 999                 self.write_indent()
1000                 self.whitespace = False
1001                 self.indention = False
1002                 if text[start] == u' ':
1003                     data = u'\\'
1004                     self.column += len(data)
1005                     if self.encoding:
1006                         data = data.encode(self.encoding)
1007                     self.stream.write(data)
1008             end += 1
1009         self.write_indicator(u'"', False)
1010
1011     def determine_chomp(self, text):
1012         tail = text[-2:]
1013         while len(tail) < 2:
1014             tail = u' '+tail
1015         if tail[-1] in u'\n\x85\u2028\u2029':
1016             if tail[-2] in u'\n\x85\u2028\u2029':
1017                 return u'+'
1018             else:
1019                 return u''
1020         else:
1021             return u'-'
1022
1023     def write_folded(self, text):
1024         chomp = self.determine_chomp(text)
1025         self.write_indicator(u'>'+chomp, True)
1026         self.write_indent()
1027         leading_space = False
1028         spaces = False
1029         breaks = False
1030         start = end = 0
1031         while end <= len(text):
1032             ch = None
1033             if end < len(text):
1034                 ch = text[end]
1035             if breaks:
1036                 if ch is None or ch not in u'\n\x85\u2028\u2029':
1037                     if not leading_space and ch is not None and ch != u' '  \
1038                             and text[start] == u'\n':
1039                         self.write_line_break()
1040                     leading_space = (ch == u' ')
1041                     for br in text[start:end]:
1042                         if br == u'\n':
1043                             self.write_line_break()
1044                         else:
1045                             self.write_line_break(br)
1046                     if ch is not None:
1047                         self.write_indent()
1048                     start = end
1049             elif spaces:
1050                 if ch != u' ':
1051                     if start+1 == end and self.column > self.best_width:
1052                         self.write_indent()
1053                     else:
1054                         data = text[start:end]
1055                         self.column += len(data)
1056                         if self.encoding:
1057                             data = data.encode(self.encoding)
1058                         self.stream.write(data)
1059                     start = end
1060             else:
1061                 if ch is None or ch in u' \n\x85\u2028\u2029':
1062                     data = text[start:end]
1063                     if self.encoding:
1064                         data = data.encode(self.encoding)
1065                     self.stream.write(data)
1066                     if ch is None:
1067                         self.write_line_break()
1068                     start = end
1069             if ch is not None:
1070                 breaks = (ch in u'\n\x85\u2028\u2029')
1071                 spaces = (ch == u' ')
1072             end += 1
1073
1074     def write_literal(self, text):
1075         chomp = self.determine_chomp(text)
1076         self.write_indicator(u'|'+chomp, True)
1077         self.write_indent()
1078         breaks = False
1079         start = end = 0
1080         while end <= len(text):
1081             ch = None
1082             if end < len(text):
1083                 ch = text[end]
1084             if breaks:
1085                 if ch is None or ch not in u'\n\x85\u2028\u2029':
1086                     for br in text[start:end]:
1087                         if br == u'\n':
1088                             self.write_line_break()
1089                         else:
1090                             self.write_line_break(br)
1091                     if ch is not None:
1092                         self.write_indent()
1093                     start = end
1094             else:
1095                 if ch is None or ch in u'\n\x85\u2028\u2029':
1096                     data = text[start:end]
1097                     if self.encoding:
1098                         data = data.encode(self.encoding)
1099                     self.stream.write(data)
1100                     if ch is None:
1101                         self.write_line_break()
1102                     start = end
1103             if ch is not None:
1104                 breaks = (ch in u'\n\x85\u2028\u2029')
1105             end += 1
1106
1107     def write_plain(self, text, split=True):
1108         if not text:
1109             return
1110         if not self.whitespace:
1111             data = u' '
1112             self.column += len(data)
1113             if self.encoding:
1114                 data = data.encode(self.encoding)
1115             self.stream.write(data)
1116         self.writespace = False
1117         self.indention = False
1118         spaces = False
1119         breaks = False
1120         start = end = 0
1121         while end <= len(text):
1122             ch = None
1123             if end < len(text):
1124                 ch = text[end]
1125             if spaces:
1126                 if ch != u' ':
1127                     if start+1 == end and self.column > self.best_width and split:
1128                         self.write_indent()
1129                         self.writespace = False
1130                         self.indention = False
1131                     else:
1132                         data = text[start:end]
1133                         self.column += len(data)
1134                         if self.encoding:
1135                             data = data.encode(self.encoding)
1136                         self.stream.write(data)
1137                     start = end
1138             elif breaks:
1139                 if ch not in u'\n\x85\u2028\u2029':
1140                     if text[start] == u'\n':
1141                         self.write_line_break()
1142                     for br in text[start:end]:
1143                         if br == u'\n':
1144                             self.write_line_break()
1145                         else:
1146                             self.write_line_break(br)
1147                     self.write_indent()
1148                     self.whitespace = False
1149                     self.indention = False
1150                     start = end
1151             else:
1152                 if ch is None or ch in u' \n\x85\u2028\u2029':
1153                     data = text[start:end]
1154                     self.column += len(data)
1155                     if self.encoding:
1156                         data = data.encode(self.encoding)
1157                     self.stream.write(data)
1158                     start = end
1159             if ch is not None:
1160                 spaces = (ch == u' ')
1161                 breaks = (ch in u'\n\x85\u2028\u2029')
1162             end += 1
1163