1 #include "emitterutils.h"
3 #include "indentation.h"
4 #include "yaml-cpp/binary.h"
5 #include "yaml-cpp/exceptions.h"
6 #include "stringsource.h"
15 enum {REPLACEMENT_CHARACTER
= 0xFFFD};
17 bool IsAnchorChar(int ch
) { // test for ns-anchor-char
19 case ',': case '[': case ']': case '{': case '}': // c-flow-indicator
20 case ' ': case '\t': // s-white
21 case 0xFEFF: // c-byte-order-mark
22 case 0xA: case 0xD: // b-char
36 if (ch
>= 0xD800 && ch
<= 0xDFFF)
38 if ((ch
& 0xFFFE) == 0xFFFE)
40 if ((ch
>= 0xFDD0) && (ch
<= 0xFDEF))
48 int Utf8BytesIndicated(char ch
) {
49 int byteVal
= static_cast<unsigned char>(ch
);
50 switch (byteVal
>> 4) {
51 case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
64 bool IsTrailingByte(char ch
) {
65 return (ch
& 0xC0) == 0x80;
68 bool GetNextCodePointAndAdvance(int& codePoint
, std::string::const_iterator
& first
, std::string::const_iterator last
) {
72 int nBytes
= Utf8BytesIndicated(*first
);
76 codePoint
= REPLACEMENT_CHARACTER
;
85 // Gather bits from trailing bytes
86 codePoint
= static_cast<unsigned char>(*first
) & ~(0xFF << (7 - nBytes
));
89 for (; nBytes
> 0; ++first
, --nBytes
) {
90 if ((first
== last
) || !IsTrailingByte(*first
)) {
91 codePoint
= REPLACEMENT_CHARACTER
;
95 codePoint
|= *first
& 0x3F;
98 // Check for illegal code points
99 if (codePoint
> 0x10FFFF)
100 codePoint
= REPLACEMENT_CHARACTER
;
101 else if (codePoint
>= 0xD800 && codePoint
<= 0xDFFF)
102 codePoint
= REPLACEMENT_CHARACTER
;
103 else if ((codePoint
& 0xFFFE) == 0xFFFE)
104 codePoint
= REPLACEMENT_CHARACTER
;
105 else if (codePoint
>= 0xFDD0 && codePoint
<= 0xFDEF)
106 codePoint
= REPLACEMENT_CHARACTER
;
110 void WriteCodePoint(ostream
& out
, int codePoint
) {
111 if (codePoint
< 0 || codePoint
> 0x10FFFF) {
112 codePoint
= REPLACEMENT_CHARACTER
;
114 if (codePoint
< 0x7F) {
115 out
<< static_cast<char>(codePoint
);
116 } else if (codePoint
< 0x7FF) {
117 out
<< static_cast<char>(0xC0 | (codePoint
>> 6))
118 << static_cast<char>(0x80 | (codePoint
& 0x3F));
119 } else if (codePoint
< 0xFFFF) {
120 out
<< static_cast<char>(0xE0 | (codePoint
>> 12))
121 << static_cast<char>(0x80 | ((codePoint
>> 6) & 0x3F))
122 << static_cast<char>(0x80 | (codePoint
& 0x3F));
124 out
<< static_cast<char>(0xF0 | (codePoint
>> 18))
125 << static_cast<char>(0x80 | ((codePoint
>> 12) & 0x3F))
126 << static_cast<char>(0x80 | ((codePoint
>> 6) & 0x3F))
127 << static_cast<char>(0x80 | (codePoint
& 0x3F));
131 bool IsValidPlainScalar(const std::string
& str
, bool inFlow
, bool allowOnlyAscii
) {
135 // first check the start
136 const RegEx
& start
= (inFlow
? Exp::PlainScalarInFlow() : Exp::PlainScalar());
137 if(!start
.Matches(str
))
140 // and check the end for plain whitespace (which can't be faithfully kept in a plain scalar)
141 if(!str
.empty() && *str
.rbegin() == ' ')
144 // then check until something is disallowed
145 const RegEx
& disallowed
= (inFlow
? Exp::EndScalarInFlow() : Exp::EndScalar())
146 || (Exp::BlankOrBreak() + Exp::Comment())
147 || Exp::NotPrintable()
148 || Exp::Utf8_ByteOrderMark()
151 StringCharSource
buffer(str
.c_str(), str
.size());
153 if(disallowed
.Matches(buffer
))
155 if(allowOnlyAscii
&& (0x7F < static_cast<unsigned char>(buffer
[0])))
163 void WriteDoubleQuoteEscapeSequence(ostream
& out
, int codePoint
) {
164 static const char hexDigits
[] = "0123456789abcdef";
166 char escSeq
[] = "\\U00000000";
168 if (codePoint
< 0xFF) {
171 } else if (codePoint
< 0xFFFF) {
176 // Write digits into the escape sequence
178 for (; digits
> 0; --digits
, ++i
) {
179 escSeq
[i
] = hexDigits
[(codePoint
>> (4 * (digits
- 1))) & 0xF];
182 escSeq
[i
] = 0; // terminate with NUL character
186 bool WriteAliasName(ostream
& out
, const std::string
& str
) {
188 for(std::string::const_iterator i
= str
.begin();
189 GetNextCodePointAndAdvance(codePoint
, i
, str
.end());
192 if (!IsAnchorChar(codePoint
))
195 WriteCodePoint(out
, codePoint
);
201 bool WriteString(ostream
& out
, const std::string
& str
, bool inFlow
, bool escapeNonAscii
)
203 if(IsValidPlainScalar(str
, inFlow
, escapeNonAscii
)) {
207 return WriteDoubleQuotedString(out
, str
, escapeNonAscii
);
210 bool WriteSingleQuotedString(ostream
& out
, const std::string
& str
)
214 for(std::string::const_iterator i
= str
.begin();
215 GetNextCodePointAndAdvance(codePoint
, i
, str
.end());
218 if (codePoint
== '\n')
219 return false; // We can't handle a new line and the attendant indentation yet
221 if (codePoint
== '\'')
224 WriteCodePoint(out
, codePoint
);
230 bool WriteDoubleQuotedString(ostream
& out
, const std::string
& str
, bool escapeNonAscii
)
234 for(std::string::const_iterator i
= str
.begin();
235 GetNextCodePointAndAdvance(codePoint
, i
, str
.end());
238 if (codePoint
== '\"')
240 else if (codePoint
== '\\')
242 else if (codePoint
< 0x20 || (codePoint
>= 0x80 && codePoint
<= 0xA0)) // Control characters and non-breaking space
243 WriteDoubleQuoteEscapeSequence(out
, codePoint
);
244 else if (codePoint
== 0xFEFF) // Byte order marks (ZWNS) should be escaped (YAML 1.2, sec. 5.2)
245 WriteDoubleQuoteEscapeSequence(out
, codePoint
);
246 else if (escapeNonAscii
&& codePoint
> 0x7E)
247 WriteDoubleQuoteEscapeSequence(out
, codePoint
);
249 WriteCodePoint(out
, codePoint
);
255 bool WriteLiteralString(ostream
& out
, const std::string
& str
, int indent
)
258 out
<< IndentTo(indent
);
260 for(std::string::const_iterator i
= str
.begin();
261 GetNextCodePointAndAdvance(codePoint
, i
, str
.end());
264 if (codePoint
== '\n')
265 out
<< "\n" << IndentTo(indent
);
267 WriteCodePoint(out
, codePoint
);
272 bool WriteChar(ostream
& out
, char ch
)
274 if(('a' <= ch
&& ch
<= 'z') || ('A' <= ch
&& ch
<= 'Z'))
276 else if((0x20 <= ch
&& ch
<= 0x7e) || ch
== ' ')
277 out
<< "\"" << ch
<< "\"";
286 WriteDoubleQuoteEscapeSequence(out
, ch
);
292 bool WriteComment(ostream
& out
, const std::string
& str
, int postCommentIndent
)
294 const unsigned curIndent
= out
.col();
295 out
<< "#" << Indentation(postCommentIndent
);
297 for(std::string::const_iterator i
= str
.begin();
298 GetNextCodePointAndAdvance(codePoint
, i
, str
.end());
301 if(codePoint
== '\n')
302 out
<< "\n" << IndentTo(curIndent
) << "#" << Indentation(postCommentIndent
);
304 WriteCodePoint(out
, codePoint
);
309 bool WriteAlias(ostream
& out
, const std::string
& str
)
312 return WriteAliasName(out
, str
);
315 bool WriteAnchor(ostream
& out
, const std::string
& str
)
318 return WriteAliasName(out
, str
);
321 bool WriteTag(ostream
& out
, const std::string
& str
, bool verbatim
)
323 out
<< (verbatim
? "!<" : "!");
324 StringCharSource
buffer(str
.c_str(), str
.size());
325 const RegEx
& reValid
= verbatim
? Exp::URI() : Exp::Tag();
327 int n
= reValid
.Match(buffer
);
341 bool WriteTagWithPrefix(ostream
& out
, const std::string
& prefix
, const std::string
& tag
)
344 StringCharSource
prefixBuffer(prefix
.c_str(), prefix
.size());
345 while(prefixBuffer
) {
346 int n
= Exp::URI().Match(prefixBuffer
);
351 out
<< prefixBuffer
[0];
357 StringCharSource
tagBuffer(tag
.c_str(), tag
.size());
359 int n
= Exp::Tag().Match(tagBuffer
);
371 bool WriteBinary(ostream
& out
, const Binary
& binary
)
373 WriteDoubleQuotedString(out
, EncodeBase64(binary
.data(), binary
.size()), false);