1 #include "emitterutils.h"
3 #include "indentation.h"
4 #include "yaml-cpp/exceptions.h"
5 #include "stringsource.h"
14 enum {REPLACEMENT_CHARACTER
= 0xFFFD};
16 bool IsAnchorChar(int ch
) { // test for ns-anchor-char
18 case ',': case '[': case ']': case '{': case '}': // c-flow-indicator
19 case ' ': case '\t': // s-white
20 case 0xFEFF: // c-byte-order-mark
21 case 0xA: case 0xD: // b-char
35 if (ch
>= 0xD800 && ch
<= 0xDFFF)
37 if ((ch
& 0xFFFE) == 0xFFFE)
39 if ((ch
>= 0xFDD0) && (ch
<= 0xFDEF))
47 int Utf8BytesIndicated(char ch
) {
48 int byteVal
= static_cast<unsigned char>(ch
);
49 switch (byteVal
>> 4) {
50 case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
63 bool IsTrailingByte(char ch
) {
64 return (ch
& 0xC0) == 0x80;
67 bool GetNextCodePointAndAdvance(int& codePoint
, std::string::const_iterator
& first
, std::string::const_iterator last
) {
71 int nBytes
= Utf8BytesIndicated(*first
);
75 codePoint
= REPLACEMENT_CHARACTER
;
84 // Gather bits from trailing bytes
85 codePoint
= static_cast<unsigned char>(*first
) & ~(0xFF << (7 - nBytes
));
88 for (; nBytes
> 0; ++first
, --nBytes
) {
89 if ((first
== last
) || !IsTrailingByte(*first
)) {
90 codePoint
= REPLACEMENT_CHARACTER
;
94 codePoint
|= *first
& 0x3F;
97 // Check for illegal code points
98 if (codePoint
> 0x10FFFF)
99 codePoint
= REPLACEMENT_CHARACTER
;
100 else if (codePoint
>= 0xD800 && codePoint
<= 0xDFFF)
101 codePoint
= REPLACEMENT_CHARACTER
;
102 else if ((codePoint
& 0xFFFE) == 0xFFFE)
103 codePoint
= REPLACEMENT_CHARACTER
;
104 else if (codePoint
>= 0xFDD0 && codePoint
<= 0xFDEF)
105 codePoint
= REPLACEMENT_CHARACTER
;
109 void WriteCodePoint(ostream
& out
, int codePoint
) {
110 if (codePoint
< 0 || codePoint
> 0x10FFFF) {
111 codePoint
= REPLACEMENT_CHARACTER
;
113 if (codePoint
< 0x7F) {
114 out
<< static_cast<char>(codePoint
);
115 } else if (codePoint
< 0x7FF) {
116 out
<< static_cast<char>(0xC0 | (codePoint
>> 6))
117 << static_cast<char>(0x80 | (codePoint
& 0x3F));
118 } else if (codePoint
< 0xFFFF) {
119 out
<< static_cast<char>(0xE0 | (codePoint
>> 12))
120 << static_cast<char>(0x80 | ((codePoint
>> 6) & 0x3F))
121 << static_cast<char>(0x80 | (codePoint
& 0x3F));
123 out
<< static_cast<char>(0xF0 | (codePoint
>> 18))
124 << static_cast<char>(0x80 | ((codePoint
>> 12) & 0x3F))
125 << static_cast<char>(0x80 | ((codePoint
>> 6) & 0x3F))
126 << static_cast<char>(0x80 | (codePoint
& 0x3F));
130 bool IsValidPlainScalar(const std::string
& str
, bool inFlow
, bool allowOnlyAscii
) {
131 // first check the start
132 const RegEx
& start
= (inFlow
? Exp::PlainScalarInFlow() : Exp::PlainScalar());
133 if(!start
.Matches(str
))
136 // and check the end for plain whitespace (which can't be faithfully kept in a plain scalar)
137 if(!str
.empty() && *str
.rbegin() == ' ')
140 // then check until something is disallowed
141 const RegEx
& disallowed
= (inFlow
? Exp::EndScalarInFlow() : Exp::EndScalar())
142 || (Exp::BlankOrBreak() + Exp::Comment())
143 || Exp::NotPrintable()
144 || Exp::Utf8_ByteOrderMark()
147 StringCharSource
buffer(str
.c_str(), str
.size());
149 if(disallowed
.Matches(buffer
))
151 if(allowOnlyAscii
&& (0x7F < static_cast<unsigned char>(buffer
[0])))
159 void WriteDoubleQuoteEscapeSequence(ostream
& out
, int codePoint
) {
160 static const char hexDigits
[] = "0123456789abcdef";
162 char escSeq
[] = "\\U00000000";
164 if (codePoint
< 0xFF) {
167 } else if (codePoint
< 0xFFFF) {
172 // Write digits into the escape sequence
174 for (; digits
> 0; --digits
, ++i
) {
175 escSeq
[i
] = hexDigits
[(codePoint
>> (4 * (digits
- 1))) & 0xF];
178 escSeq
[i
] = 0; // terminate with NUL character
182 bool WriteAliasName(ostream
& out
, const std::string
& str
) {
184 for(std::string::const_iterator i
= str
.begin();
185 GetNextCodePointAndAdvance(codePoint
, i
, str
.end());
188 if (!IsAnchorChar(codePoint
))
191 WriteCodePoint(out
, codePoint
);
197 bool WriteString(ostream
& out
, const std::string
& str
, bool inFlow
, bool escapeNonAscii
)
199 if(IsValidPlainScalar(str
, inFlow
, escapeNonAscii
)) {
203 return WriteDoubleQuotedString(out
, str
, escapeNonAscii
);
206 bool WriteSingleQuotedString(ostream
& out
, const std::string
& str
)
210 for(std::string::const_iterator i
= str
.begin();
211 GetNextCodePointAndAdvance(codePoint
, i
, str
.end());
214 if (codePoint
== '\n')
215 return false; // We can't handle a new line and the attendant indentation yet
217 if (codePoint
== '\'')
220 WriteCodePoint(out
, codePoint
);
226 bool WriteDoubleQuotedString(ostream
& out
, const std::string
& str
, bool escapeNonAscii
)
230 for(std::string::const_iterator i
= str
.begin();
231 GetNextCodePointAndAdvance(codePoint
, i
, str
.end());
234 if (codePoint
== '\"')
236 else if (codePoint
== '\\')
238 else if (codePoint
< 0x20 || (codePoint
>= 0x80 && codePoint
<= 0xA0)) // Control characters and non-breaking space
239 WriteDoubleQuoteEscapeSequence(out
, codePoint
);
240 else if (codePoint
== 0xFEFF) // Byte order marks (ZWNS) should be escaped (YAML 1.2, sec. 5.2)
241 WriteDoubleQuoteEscapeSequence(out
, codePoint
);
242 else if (escapeNonAscii
&& codePoint
> 0x7E)
243 WriteDoubleQuoteEscapeSequence(out
, codePoint
);
245 WriteCodePoint(out
, codePoint
);
251 bool WriteLiteralString(ostream
& out
, const std::string
& str
, int indent
)
254 out
<< IndentTo(indent
);
256 for(std::string::const_iterator i
= str
.begin();
257 GetNextCodePointAndAdvance(codePoint
, i
, str
.end());
260 if (codePoint
== '\n')
261 out
<< "\n" << IndentTo(indent
);
263 WriteCodePoint(out
, codePoint
);
268 bool WriteComment(ostream
& out
, const std::string
& str
, int postCommentIndent
)
270 unsigned curIndent
= out
.col();
271 out
<< "#" << Indentation(postCommentIndent
);
273 for(std::string::const_iterator i
= str
.begin();
274 GetNextCodePointAndAdvance(codePoint
, i
, str
.end());
277 if(codePoint
== '\n')
278 out
<< "\n" << IndentTo(curIndent
) << "#" << Indentation(postCommentIndent
);
280 WriteCodePoint(out
, codePoint
);
285 bool WriteAlias(ostream
& out
, const std::string
& str
)
288 return WriteAliasName(out
, str
);
291 bool WriteAnchor(ostream
& out
, const std::string
& str
)
294 return WriteAliasName(out
, str
);
297 bool WriteTag(ostream
& out
, const std::string
& str
, bool verbatim
)
299 out
<< (verbatim
? "!<" : "!");
300 StringCharSource
buffer(str
.c_str(), str
.size());
301 const RegEx
& reValid
= verbatim
? Exp::URI() : Exp::Tag();
303 int n
= reValid
.Match(buffer
);
317 bool WriteTagWithPrefix(ostream
& out
, const std::string
& prefix
, const std::string
& tag
)
320 StringCharSource
prefixBuffer(prefix
.c_str(), prefix
.size());
321 while(prefixBuffer
) {
322 int n
= Exp::URI().Match(prefixBuffer
);
327 out
<< prefixBuffer
[0];
333 StringCharSource
tagBuffer(tag
.c_str(), tag
.size());
335 int n
= Exp::Tag().Match(tagBuffer
);
347 bool WriteBinary(ostream
& out
, const char *data
, std::size_t size
)
349 static const char encoding
[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
350 const char PAD
= '=';
353 std::size_t chunks
= size
/ 3;
354 std::size_t remainder
= size
% 3;
356 for(std::size_t i
=0;i
<chunks
;i
++, data
+= 3) {
357 out
<< encoding
[data
[0] >> 2];
358 out
<< encoding
[((data
[0] & 0x3) << 4) | (data
[1] >> 4)];
359 out
<< encoding
[((data
[1] & 0xf) << 2) | (data
[2] >> 6)];
360 out
<< encoding
[data
[2] & 0x3f];
367 out
<< encoding
[data
[0] >> 2];
368 out
<< encoding
[((data
[0] & 0x3) << 4)];
373 out
<< encoding
[data
[0] >> 2];
374 out
<< encoding
[((data
[0] & 0x3) << 4) | (data
[1] >> 4)];
375 out
<< encoding
[((data
[1] & 0xf) << 2)];