Merge pull request #506 from andrewcsmith/patch-2
[supercollider.git] / external_libraries / yaml-cpp-0.3.0 / src / emitterutils.cpp
blob3d184d6cb95d599ecd5ee1a31cb519747ed39e84
1 #include "emitterutils.h"
2 #include "exp.h"
3 #include "indentation.h"
4 #include "yaml-cpp/binary.h"
5 #include "yaml-cpp/exceptions.h"
6 #include "stringsource.h"
7 #include <sstream>
8 #include <iomanip>
10 namespace YAML
12 namespace Utils
14 namespace {
15 enum {REPLACEMENT_CHARACTER = 0xFFFD};
17 bool IsAnchorChar(int ch) { // test for ns-anchor-char
18 switch (ch) {
19 case ',': case '[': case ']': case '{': case '}': // c-flow-indicator
20 case ' ': case '\t': // s-white
21 case 0xFEFF: // c-byte-order-mark
22 case 0xA: case 0xD: // b-char
23 return false;
24 case 0x85:
25 return true;
28 if (ch < 0x20)
29 return false;
31 if (ch < 0x7E)
32 return true;
34 if (ch < 0xA0)
35 return false;
36 if (ch >= 0xD800 && ch <= 0xDFFF)
37 return false;
38 if ((ch & 0xFFFE) == 0xFFFE)
39 return false;
40 if ((ch >= 0xFDD0) && (ch <= 0xFDEF))
41 return false;
42 if (ch > 0x10FFFF)
43 return false;
45 return true;
48 int Utf8BytesIndicated(char ch) {
49 int byteVal = static_cast<unsigned char>(ch);
50 switch (byteVal >> 4) {
51 case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
52 return 1;
53 case 12: case 13:
54 return 2;
55 case 14:
56 return 3;
57 case 15:
58 return 4;
59 default:
60 return -1;
64 bool IsTrailingByte(char ch) {
65 return (ch & 0xC0) == 0x80;
68 bool GetNextCodePointAndAdvance(int& codePoint, std::string::const_iterator& first, std::string::const_iterator last) {
69 if (first == last)
70 return false;
72 int nBytes = Utf8BytesIndicated(*first);
73 if (nBytes < 1) {
74 // Bad lead byte
75 ++first;
76 codePoint = REPLACEMENT_CHARACTER;
77 return true;
80 if (nBytes == 1) {
81 codePoint = *first++;
82 return true;
85 // Gather bits from trailing bytes
86 codePoint = static_cast<unsigned char>(*first) & ~(0xFF << (7 - nBytes));
87 ++first;
88 --nBytes;
89 for (; nBytes > 0; ++first, --nBytes) {
90 if ((first == last) || !IsTrailingByte(*first)) {
91 codePoint = REPLACEMENT_CHARACTER;
92 break;
94 codePoint <<= 6;
95 codePoint |= *first & 0x3F;
98 // Check for illegal code points
99 if (codePoint > 0x10FFFF)
100 codePoint = REPLACEMENT_CHARACTER;
101 else if (codePoint >= 0xD800 && codePoint <= 0xDFFF)
102 codePoint = REPLACEMENT_CHARACTER;
103 else if ((codePoint & 0xFFFE) == 0xFFFE)
104 codePoint = REPLACEMENT_CHARACTER;
105 else if (codePoint >= 0xFDD0 && codePoint <= 0xFDEF)
106 codePoint = REPLACEMENT_CHARACTER;
107 return true;
110 void WriteCodePoint(ostream& out, int codePoint) {
111 if (codePoint < 0 || codePoint > 0x10FFFF) {
112 codePoint = REPLACEMENT_CHARACTER;
114 if (codePoint < 0x7F) {
115 out << static_cast<char>(codePoint);
116 } else if (codePoint < 0x7FF) {
117 out << static_cast<char>(0xC0 | (codePoint >> 6))
118 << static_cast<char>(0x80 | (codePoint & 0x3F));
119 } else if (codePoint < 0xFFFF) {
120 out << static_cast<char>(0xE0 | (codePoint >> 12))
121 << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
122 << static_cast<char>(0x80 | (codePoint & 0x3F));
123 } else {
124 out << static_cast<char>(0xF0 | (codePoint >> 18))
125 << static_cast<char>(0x80 | ((codePoint >> 12) & 0x3F))
126 << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
127 << static_cast<char>(0x80 | (codePoint & 0x3F));
131 bool IsValidPlainScalar(const std::string& str, bool inFlow, bool allowOnlyAscii) {
132 if(str.empty())
133 return false;
135 // first check the start
136 const RegEx& start = (inFlow ? Exp::PlainScalarInFlow() : Exp::PlainScalar());
137 if(!start.Matches(str))
138 return false;
140 // and check the end for plain whitespace (which can't be faithfully kept in a plain scalar)
141 if(!str.empty() && *str.rbegin() == ' ')
142 return false;
144 // then check until something is disallowed
145 const RegEx& disallowed = (inFlow ? Exp::EndScalarInFlow() : Exp::EndScalar())
146 || (Exp::BlankOrBreak() + Exp::Comment())
147 || Exp::NotPrintable()
148 || Exp::Utf8_ByteOrderMark()
149 || Exp::Break()
150 || Exp::Tab();
151 StringCharSource buffer(str.c_str(), str.size());
152 while(buffer) {
153 if(disallowed.Matches(buffer))
154 return false;
155 if(allowOnlyAscii && (0x7F < static_cast<unsigned char>(buffer[0])))
156 return false;
157 ++buffer;
160 return true;
163 void WriteDoubleQuoteEscapeSequence(ostream& out, int codePoint) {
164 static const char hexDigits[] = "0123456789abcdef";
166 char escSeq[] = "\\U00000000";
167 int digits = 8;
168 if (codePoint < 0xFF) {
169 escSeq[1] = 'x';
170 digits = 2;
171 } else if (codePoint < 0xFFFF) {
172 escSeq[1] = 'u';
173 digits = 4;
176 // Write digits into the escape sequence
177 int i = 2;
178 for (; digits > 0; --digits, ++i) {
179 escSeq[i] = hexDigits[(codePoint >> (4 * (digits - 1))) & 0xF];
182 escSeq[i] = 0; // terminate with NUL character
183 out << escSeq;
186 bool WriteAliasName(ostream& out, const std::string& str) {
187 int codePoint;
188 for(std::string::const_iterator i = str.begin();
189 GetNextCodePointAndAdvance(codePoint, i, str.end());
192 if (!IsAnchorChar(codePoint))
193 return false;
195 WriteCodePoint(out, codePoint);
197 return true;
201 bool WriteString(ostream& out, const std::string& str, bool inFlow, bool escapeNonAscii)
203 if(IsValidPlainScalar(str, inFlow, escapeNonAscii)) {
204 out << str;
205 return true;
206 } else
207 return WriteDoubleQuotedString(out, str, escapeNonAscii);
210 bool WriteSingleQuotedString(ostream& out, const std::string& str)
212 out << "'";
213 int codePoint;
214 for(std::string::const_iterator i = str.begin();
215 GetNextCodePointAndAdvance(codePoint, i, str.end());
218 if (codePoint == '\n')
219 return false; // We can't handle a new line and the attendant indentation yet
221 if (codePoint == '\'')
222 out << "''";
223 else
224 WriteCodePoint(out, codePoint);
226 out << "'";
227 return true;
230 bool WriteDoubleQuotedString(ostream& out, const std::string& str, bool escapeNonAscii)
232 out << "\"";
233 int codePoint;
234 for(std::string::const_iterator i = str.begin();
235 GetNextCodePointAndAdvance(codePoint, i, str.end());
238 if (codePoint == '\"')
239 out << "\\\"";
240 else if (codePoint == '\\')
241 out << "\\\\";
242 else if (codePoint < 0x20 || (codePoint >= 0x80 && codePoint <= 0xA0)) // Control characters and non-breaking space
243 WriteDoubleQuoteEscapeSequence(out, codePoint);
244 else if (codePoint == 0xFEFF) // Byte order marks (ZWNS) should be escaped (YAML 1.2, sec. 5.2)
245 WriteDoubleQuoteEscapeSequence(out, codePoint);
246 else if (escapeNonAscii && codePoint > 0x7E)
247 WriteDoubleQuoteEscapeSequence(out, codePoint);
248 else
249 WriteCodePoint(out, codePoint);
251 out << "\"";
252 return true;
255 bool WriteLiteralString(ostream& out, const std::string& str, int indent)
257 out << "|\n";
258 out << IndentTo(indent);
259 int codePoint;
260 for(std::string::const_iterator i = str.begin();
261 GetNextCodePointAndAdvance(codePoint, i, str.end());
264 if (codePoint == '\n')
265 out << "\n" << IndentTo(indent);
266 else
267 WriteCodePoint(out, codePoint);
269 return true;
272 bool WriteChar(ostream& out, char ch)
274 if(('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z'))
275 out << ch;
276 else if((0x20 <= ch && ch <= 0x7e) || ch == ' ')
277 out << "\"" << ch << "\"";
278 else if(ch == '\t')
279 out << "\"\\t\"";
280 else if(ch == '\n')
281 out << "\"\\n\"";
282 else if(ch == '\b')
283 out << "\"\\b\"";
284 else {
285 out << "\"";
286 WriteDoubleQuoteEscapeSequence(out, ch);
287 out << "\"";
289 return true;
292 bool WriteComment(ostream& out, const std::string& str, int postCommentIndent)
294 const unsigned curIndent = out.col();
295 out << "#" << Indentation(postCommentIndent);
296 int codePoint;
297 for(std::string::const_iterator i = str.begin();
298 GetNextCodePointAndAdvance(codePoint, i, str.end());
301 if(codePoint == '\n')
302 out << "\n" << IndentTo(curIndent) << "#" << Indentation(postCommentIndent);
303 else
304 WriteCodePoint(out, codePoint);
306 return true;
309 bool WriteAlias(ostream& out, const std::string& str)
311 out << "*";
312 return WriteAliasName(out, str);
315 bool WriteAnchor(ostream& out, const std::string& str)
317 out << "&";
318 return WriteAliasName(out, str);
321 bool WriteTag(ostream& out, const std::string& str, bool verbatim)
323 out << (verbatim ? "!<" : "!");
324 StringCharSource buffer(str.c_str(), str.size());
325 const RegEx& reValid = verbatim ? Exp::URI() : Exp::Tag();
326 while(buffer) {
327 int n = reValid.Match(buffer);
328 if(n <= 0)
329 return false;
331 while(--n >= 0) {
332 out << buffer[0];
333 ++buffer;
336 if (verbatim)
337 out << ">";
338 return true;
341 bool WriteTagWithPrefix(ostream& out, const std::string& prefix, const std::string& tag)
343 out << "!";
344 StringCharSource prefixBuffer(prefix.c_str(), prefix.size());
345 while(prefixBuffer) {
346 int n = Exp::URI().Match(prefixBuffer);
347 if(n <= 0)
348 return false;
350 while(--n >= 0) {
351 out << prefixBuffer[0];
352 ++prefixBuffer;
356 out << "!";
357 StringCharSource tagBuffer(tag.c_str(), tag.size());
358 while(tagBuffer) {
359 int n = Exp::Tag().Match(tagBuffer);
360 if(n <= 0)
361 return false;
363 while(--n >= 0) {
364 out << tagBuffer[0];
365 ++tagBuffer;
368 return true;
371 bool WriteBinary(ostream& out, const Binary& binary)
373 WriteDoubleQuotedString(out, EncodeBase64(binary.data(), binary.size()), false);
374 return true;