external_libraries/yaml-cpp-0.2.6/src/emitterutils.cpp

   1 #include "emitterutils.h"
   2 #include "exp.h"
   3 #include "indentation.h"
   4 #include "yaml-cpp/exceptions.h"
   5 #include "stringsource.h"
   6 #include <sstream>
   7 #include <iomanip>
   8
   9 namespace YAML
  10 {
  11         namespace Utils
  12         {
  13                 namespace {
  14                         enum {REPLACEMENT_CHARACTER = 0xFFFD};
  15
  16                         bool IsAnchorChar(int ch) { // test for ns-anchor-char
  17                                 switch (ch) {
  18                                         case ',': case '[': case ']': case '{': case '}': // c-flow-indicator
  19                                         case ' ': case '\t': // s-white
  20                                         case 0xFEFF: // c-byte-order-mark
  21                                         case 0xA: case 0xD: // b-char
  22                                                 return false;
  23                                         case 0x85:
  24                                                 return true;
  25                                 }
  26
  27                                 if (ch < 0x20)
  28                                         return false;
  29
  30                                 if (ch < 0x7E)
  31                                         return true;
  32
  33                                 if (ch < 0xA0)
  34                                         return false;
  35                                 if (ch >= 0xD800 && ch <= 0xDFFF)
  36                                         return false;
  37                                 if ((ch & 0xFFFE) == 0xFFFE)
  38                                         return false;
  39                                 if ((ch >= 0xFDD0) && (ch <= 0xFDEF))
  40                                         return false;
  41                                 if (ch > 0x10FFFF)
  42                                         return false;
  43
  44                                 return true;
  45                         }
  46
  47                         int Utf8BytesIndicated(char ch) {
  48                                 int byteVal = static_cast<unsigned char>(ch);
  49                                 switch (byteVal >> 4) {
  50                                         case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
  51                                                 return 1;
  52                                         case 12: case 13:
  53                                                 return 2;
  54                                         case 14:
  55                                                 return 3;
  56                                         case 15:
  57                                                 return 4;
  58                                         default:
  59                                           return -1;
  60                                 }
  61                         }
  62
  63                         bool IsTrailingByte(char ch) {
  64                                 return (ch & 0xC0) == 0x80;
  65                         }
  66
  67                         bool GetNextCodePointAndAdvance(int& codePoint, std::string::const_iterator& first, std::string::const_iterator last) {
  68                                 if (first == last)
  69                                         return false;
  70
  71                                 int nBytes = Utf8BytesIndicated(*first);
  72                                 if (nBytes < 1) {
  73                                         // Bad lead byte
  74                                         ++first;
  75                                         codePoint = REPLACEMENT_CHARACTER;
  76                                         return true;
  77                                 }
  78
  79                                 if (nBytes == 1) {
  80                                         codePoint = *first++;
  81                                         return true;
  82                                 }
  83
  84                                 // Gather bits from trailing bytes
  85                                 codePoint = static_cast<unsigned char>(*first) & ~(0xFF << (7 - nBytes));
  86                                 ++first;
  87                                 --nBytes;
  88                                 for (; nBytes > 0; ++first, --nBytes) {
  89                                         if ((first == last) || !IsTrailingByte(*first)) {
  90                                                 codePoint = REPLACEMENT_CHARACTER;
  91                                                 break;
  92                                         }
  93                                         codePoint <<= 6;
  94                                         codePoint |= *first & 0x3F;
  95                                 }
  96
  97                                 // Check for illegal code points
  98                                 if (codePoint > 0x10FFFF)
  99                                         codePoint = REPLACEMENT_CHARACTER;
 100                                 else if (codePoint >= 0xD800 && codePoint <= 0xDFFF)
 101                                         codePoint = REPLACEMENT_CHARACTER;
 102                                 else if ((codePoint & 0xFFFE) == 0xFFFE)
 103                                         codePoint = REPLACEMENT_CHARACTER;
 104                                 else if (codePoint >= 0xFDD0 && codePoint <= 0xFDEF)
 105                                         codePoint = REPLACEMENT_CHARACTER;
 106                                 return true;
 107                         }
 108
 109                         void WriteCodePoint(ostream& out, int codePoint) {
 110                                 if (codePoint < 0 || codePoint > 0x10FFFF) {
 111                                         codePoint = REPLACEMENT_CHARACTER;
 112                                 }
 113                                 if (codePoint < 0x7F) {
 114                                         out << static_cast<char>(codePoint);
 115                                 } else if (codePoint < 0x7FF) {
 116                                         out << static_cast<char>(0xC0 | (codePoint >> 6))
 117                                             << static_cast<char>(0x80 | (codePoint & 0x3F));
 118                                 } else if (codePoint < 0xFFFF) {
 119                                         out << static_cast<char>(0xE0 | (codePoint >> 12))
 120                                             << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
 121                                             << static_cast<char>(0x80 | (codePoint & 0x3F));
 122                                 } else {
 123                                         out << static_cast<char>(0xF0 | (codePoint >> 18))
 124                                             << static_cast<char>(0x80 | ((codePoint >> 12) & 0x3F))
 125                                             << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
 126                                             << static_cast<char>(0x80 | (codePoint & 0x3F));
 127                                 }
 128                         }
 129
 130                         bool IsValidPlainScalar(const std::string& str, bool inFlow, bool allowOnlyAscii) {
 131                                 // first check the start
 132                                 const RegEx& start = (inFlow ? Exp::PlainScalarInFlow() : Exp::PlainScalar());
 133                                 if(!start.Matches(str))
 134                                         return false;
 135
 136                                 // and check the end for plain whitespace (which can't be faithfully kept in a plain scalar)
 137                                 if(!str.empty() && *str.rbegin() == ' ')
 138                                         return false;
 139
 140                                 // then check until something is disallowed
 141                                 const RegEx& disallowed = (inFlow ? Exp::EndScalarInFlow() : Exp::EndScalar())
 142                                                           || (Exp::BlankOrBreak() + Exp::Comment())
 143                                                           || Exp::NotPrintable()
 144                                                           || Exp::Utf8_ByteOrderMark()
 145                                                           || Exp::Break()
 146                                                           || Exp::Tab();
 147                                 StringCharSource buffer(str.c_str(), str.size());
 148                                 while(buffer) {
 149                                         if(disallowed.Matches(buffer))
 150                                                 return false;
 151                                         if(allowOnlyAscii && (0x7F < static_cast<unsigned char>(buffer[0])))
 152                                                 return false;
 153                                         ++buffer;
 154                                 }
 155
 156                                 return true;
 157                         }
 158
 159                         void WriteDoubleQuoteEscapeSequence(ostream& out, int codePoint) {
 160                                 static const char hexDigits[] = "0123456789abcdef";
 161
 162                                 char escSeq[] = "\\U00000000";
 163                                 int digits = 8;
 164                                 if (codePoint < 0xFF) {
 165                                         escSeq[1] = 'x';
 166                                         digits = 2;
 167                                 } else if (codePoint < 0xFFFF) {
 168                                         escSeq[1] = 'u';
 169                                         digits = 4;
 170                                 }
 171
 172                                 // Write digits into the escape sequence
 173                                 int i = 2;
 174                                 for (; digits > 0; --digits, ++i) {
 175                                         escSeq[i] = hexDigits[(codePoint >> (4 * (digits - 1))) & 0xF];
 176                                 }
 177
 178                                 escSeq[i] = 0; // terminate with NUL character
 179                                 out << escSeq;
 180                         }
 181
 182                         bool WriteAliasName(ostream& out, const std::string& str) {
 183                                 int codePoint;
 184                                 for(std::string::const_iterator i = str.begin();
 185                                         GetNextCodePointAndAdvance(codePoint, i, str.end());
 186                                         )
 187                                 {
 188                                         if (!IsAnchorChar(codePoint))
 189                                                 return false;
 190
 191                                         WriteCodePoint(out, codePoint);
 192                                 }
 193                                 return true;
 194                         }
 195                 }
 196
 197                 bool WriteString(ostream& out, const std::string& str, bool inFlow, bool escapeNonAscii)
 198                 {
 199                         if(IsValidPlainScalar(str, inFlow, escapeNonAscii)) {
 200                                 out << str;
 201                                 return true;
 202                         } else
 203                                 return WriteDoubleQuotedString(out, str, escapeNonAscii);
 204                 }
 205
 206                 bool WriteSingleQuotedString(ostream& out, const std::string& str)
 207                 {
 208                         out << "'";
 209                         int codePoint;
 210                         for(std::string::const_iterator i = str.begin();
 211                                 GetNextCodePointAndAdvance(codePoint, i, str.end());
 212                                 )
 213                         {
 214                                 if (codePoint == '\n')
 215                                         return false;  // We can't handle a new line and the attendant indentation yet
 216
 217                                 if (codePoint == '\'')
 218                                         out << "''";
 219                                 else
 220                                         WriteCodePoint(out, codePoint);
 221                         }
 222                         out << "'";
 223                         return true;
 224                 }
 225
 226                 bool WriteDoubleQuotedString(ostream& out, const std::string& str, bool escapeNonAscii)
 227                 {
 228                         out << "\"";
 229                         int codePoint;
 230                         for(std::string::const_iterator i = str.begin();
 231                                 GetNextCodePointAndAdvance(codePoint, i, str.end());
 232                                 )
 233                         {
 234                                 if (codePoint == '\"')
 235                                         out << "\\\"";
 236                                 else if (codePoint == '\\')
 237                                         out << "\\\\";
 238                                 else if (codePoint < 0x20 || (codePoint >= 0x80 && codePoint <= 0xA0)) // Control characters and non-breaking space
 239                                         WriteDoubleQuoteEscapeSequence(out, codePoint);
 240                                 else if (codePoint == 0xFEFF) // Byte order marks (ZWNS) should be escaped (YAML 1.2, sec. 5.2)
 241                                         WriteDoubleQuoteEscapeSequence(out, codePoint);
 242                                 else if (escapeNonAscii && codePoint > 0x7E)
 243                                         WriteDoubleQuoteEscapeSequence(out, codePoint);
 244                                 else
 245                                         WriteCodePoint(out, codePoint);
 246                         }
 247                         out << "\"";
 248                         return true;
 249                 }
 250
 251                 bool WriteLiteralString(ostream& out, const std::string& str, int indent)
 252                 {
 253                         out << "|\n";
 254                         out << IndentTo(indent);
 255                         int codePoint;
 256                         for(std::string::const_iterator i = str.begin();
 257                                 GetNextCodePointAndAdvance(codePoint, i, str.end());
 258                                 )
 259                         {
 260                                 if (codePoint == '\n')
 261                                   out << "\n" << IndentTo(indent);
 262                                 else
 263                                   WriteCodePoint(out, codePoint);
 264                         }
 265                         return true;
 266                 }
 267
 268                 bool WriteComment(ostream& out, const std::string& str, int postCommentIndent)
 269                 {
 270                         unsigned curIndent = out.col();
 271                         out << "#" << Indentation(postCommentIndent);
 272                         int codePoint;
 273                         for(std::string::const_iterator i = str.begin();
 274                                 GetNextCodePointAndAdvance(codePoint, i, str.end());
 275                                 )
 276                         {
 277                                 if(codePoint == '\n')
 278                                         out << "\n" << IndentTo(curIndent) << "#" << Indentation(postCommentIndent);
 279                                 else
 280                                         WriteCodePoint(out, codePoint);
 281                         }
 282                         return true;
 283                 }
 284
 285                 bool WriteAlias(ostream& out, const std::string& str)
 286                 {
 287                         out << "*";
 288                         return WriteAliasName(out, str);
 289                 }
 290
 291                 bool WriteAnchor(ostream& out, const std::string& str)
 292                 {
 293                         out << "&";
 294                         return WriteAliasName(out, str);
 295                 }
 296
 297                 bool WriteTag(ostream& out, const std::string& str, bool verbatim)
 298                 {
 299                         out << (verbatim ? "!<" : "!");
 300                         StringCharSource buffer(str.c_str(), str.size());
 301                         const RegEx& reValid = verbatim ? Exp::URI() : Exp::Tag();
 302                         while(buffer) {
 303                                 int n = reValid.Match(buffer);
 304                                 if(n <= 0)
 305                                         return false;
 306
 307                                 while(--n >= 0) {
 308                                         out << buffer[0];
 309                                         ++buffer;
 310                                 }
 311                         }
 312                         if (verbatim)
 313                                 out << ">";
 314                         return true;
 315                 }
 316
 317                 bool WriteTagWithPrefix(ostream& out, const std::string& prefix, const std::string& tag)
 318                 {
 319                         out << "!";
 320                         StringCharSource prefixBuffer(prefix.c_str(), prefix.size());
 321                         while(prefixBuffer) {
 322                                 int n = Exp::URI().Match(prefixBuffer);
 323                                 if(n <= 0)
 324                                         return false;
 325
 326                                 while(--n >= 0) {
 327                                         out << prefixBuffer[0];
 328                                         ++prefixBuffer;
 329                                 }
 330                         }
 331
 332                         out << "!";
 333                         StringCharSource tagBuffer(tag.c_str(), tag.size());
 334                         while(tagBuffer) {
 335                                 int n = Exp::Tag().Match(tagBuffer);
 336                                 if(n <= 0)
 337                                         return false;
 338
 339                                 while(--n >= 0) {
 340                                         out << tagBuffer[0];
 341                                         ++tagBuffer;
 342                                 }
 343                         }
 344                         return true;
 345                 }
 346
 347                 bool WriteBinary(ostream& out, const char *data, std::size_t size)
 348                 {
 349                         static const char encoding[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 350                         const char PAD = '=';
 351
 352                         out << "\"";
 353                         std::size_t chunks = size / 3;
 354                         std::size_t remainder = size % 3;
 355
 356                         for(std::size_t i=0;i<chunks;i++, data += 3) {
 357                                 out << encoding[data[0] >> 2];
 358                                 out << encoding[((data[0] & 0x3) << 4) | (data[1] >> 4)];
 359                                 out << encoding[((data[1] & 0xf) << 2) | (data[2] >> 6)];
 360                                 out << encoding[data[2] & 0x3f];
 361                         }
 362
 363                         switch(remainder) {
 364                                 case 0:
 365                                         break;
 366                                 case 1:
 367                                         out << encoding[data[0] >> 2];
 368                                         out << encoding[((data[0] & 0x3) << 4)];
 369                                         out << PAD;
 370                                         out << PAD;
 371                                         break;
 372                                 case 2:
 373                                         out << encoding[data[0] >> 2];
 374                                         out << encoding[((data[0] & 0x3) << 4) | (data[1] >> 4)];
 375                                         out << encoding[((data[1] & 0xf) << 2)];
 376                                         out << PAD;
 377                                         break;
 378                         }
 379
 380                         out << "\"";
 381                         return true;
 382                 }
 383         }
 384 }
 385