llvm/lib/Support/JSON.cpp

   1 //=== JSON.cpp - JSON value, parsing and serialization - C++ -----------*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===---------------------------------------------------------------------===//
   8
   9 #include "llvm/Support/JSON.h"
  10 #include "llvm/ADT/STLExtras.h"
  11 #include "llvm/ADT/StringExtras.h"
  12 #include "llvm/Support/ConvertUTF.h"
  13 #include "llvm/Support/Error.h"
  14 #include "llvm/Support/Format.h"
  15 #include "llvm/Support/NativeFormatting.h"
  16 #include "llvm/Support/raw_ostream.h"
  17 #include <cctype>
  18 #include <cerrno>
  19 #include <optional>
  20
  21 namespace llvm {
  22 namespace json {
  23
  24 Value &Object::operator[](const ObjectKey &K) {
  25   return try_emplace(K, nullptr).first->getSecond();
  26 }
  27 Value &Object::operator[](ObjectKey &&K) {
  28   return try_emplace(std::move(K), nullptr).first->getSecond();
  29 }
  30 Value *Object::get(StringRef K) {
  31   auto I = find(K);
  32   if (I == end())
  33     return nullptr;
  34   return &I->second;
  35 }
  36 const Value *Object::get(StringRef K) const {
  37   auto I = find(K);
  38   if (I == end())
  39     return nullptr;
  40   return &I->second;
  41 }
  42 std::optional<std::nullptr_t> Object::getNull(StringRef K) const {
  43   if (auto *V = get(K))
  44     return V->getAsNull();
  45   return std::nullopt;
  46 }
  47 std::optional<bool> Object::getBoolean(StringRef K) const {
  48   if (auto *V = get(K))
  49     return V->getAsBoolean();
  50   return std::nullopt;
  51 }
  52 std::optional<double> Object::getNumber(StringRef K) const {
  53   if (auto *V = get(K))
  54     return V->getAsNumber();
  55   return std::nullopt;
  56 }
  57 std::optional<int64_t> Object::getInteger(StringRef K) const {
  58   if (auto *V = get(K))
  59     return V->getAsInteger();
  60   return std::nullopt;
  61 }
  62 std::optional<llvm::StringRef> Object::getString(StringRef K) const {
  63   if (auto *V = get(K))
  64     return V->getAsString();
  65   return std::nullopt;
  66 }
  67 const json::Object *Object::getObject(StringRef K) const {
  68   if (auto *V = get(K))
  69     return V->getAsObject();
  70   return nullptr;
  71 }
  72 json::Object *Object::getObject(StringRef K) {
  73   if (auto *V = get(K))
  74     return V->getAsObject();
  75   return nullptr;
  76 }
  77 const json::Array *Object::getArray(StringRef K) const {
  78   if (auto *V = get(K))
  79     return V->getAsArray();
  80   return nullptr;
  81 }
  82 json::Array *Object::getArray(StringRef K) {
  83   if (auto *V = get(K))
  84     return V->getAsArray();
  85   return nullptr;
  86 }
  87 bool operator==(const Object &LHS, const Object &RHS) {
  88   if (LHS.size() != RHS.size())
  89     return false;
  90   for (const auto &L : LHS) {
  91     auto R = RHS.find(L.first);
  92     if (R == RHS.end() || L.second != R->second)
  93       return false;
  94   }
  95   return true;
  96 }
  97
  98 Array::Array(std::initializer_list<Value> Elements) {
  99   V.reserve(Elements.size());
 100   for (const Value &V : Elements) {
 101     emplace_back(nullptr);
 102     back().moveFrom(std::move(V));
 103   }
 104 }
 105
 106 Value::Value(std::initializer_list<Value> Elements)
 107     : Value(json::Array(Elements)) {}
 108
 109 void Value::copyFrom(const Value &M) {
 110   Type = M.Type;
 111   switch (Type) {
 112   case T_Null:
 113   case T_Boolean:
 114   case T_Double:
 115   case T_Integer:
 116   case T_UINT64:
 117     memcpy(&Union, &M.Union, sizeof(Union));
 118     break;
 119   case T_StringRef:
 120     create<StringRef>(M.as<StringRef>());
 121     break;
 122   case T_String:
 123     create<std::string>(M.as<std::string>());
 124     break;
 125   case T_Object:
 126     create<json::Object>(M.as<json::Object>());
 127     break;
 128   case T_Array:
 129     create<json::Array>(M.as<json::Array>());
 130     break;
 131   }
 132 }
 133
 134 void Value::moveFrom(const Value &&M) {
 135   Type = M.Type;
 136   switch (Type) {
 137   case T_Null:
 138   case T_Boolean:
 139   case T_Double:
 140   case T_Integer:
 141   case T_UINT64:
 142     memcpy(&Union, &M.Union, sizeof(Union));
 143     break;
 144   case T_StringRef:
 145     create<StringRef>(M.as<StringRef>());
 146     break;
 147   case T_String:
 148     create<std::string>(std::move(M.as<std::string>()));
 149     M.Type = T_Null;
 150     break;
 151   case T_Object:
 152     create<json::Object>(std::move(M.as<json::Object>()));
 153     M.Type = T_Null;
 154     break;
 155   case T_Array:
 156     create<json::Array>(std::move(M.as<json::Array>()));
 157     M.Type = T_Null;
 158     break;
 159   }
 160 }
 161
 162 void Value::destroy() {
 163   switch (Type) {
 164   case T_Null:
 165   case T_Boolean:
 166   case T_Double:
 167   case T_Integer:
 168   case T_UINT64:
 169     break;
 170   case T_StringRef:
 171     as<StringRef>().~StringRef();
 172     break;
 173   case T_String:
 174     as<std::string>().~basic_string();
 175     break;
 176   case T_Object:
 177     as<json::Object>().~Object();
 178     break;
 179   case T_Array:
 180     as<json::Array>().~Array();
 181     break;
 182   }
 183 }
 184
 185 bool operator==(const Value &L, const Value &R) {
 186   if (L.kind() != R.kind())
 187     return false;
 188   switch (L.kind()) {
 189   case Value::Null:
 190     return *L.getAsNull() == *R.getAsNull();
 191   case Value::Boolean:
 192     return *L.getAsBoolean() == *R.getAsBoolean();
 193   case Value::Number:
 194     // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323
 195     // The same integer must convert to the same double, per the standard.
 196     // However we see 64-vs-80-bit precision comparisons with gcc-7 -O3 -m32.
 197     // So we avoid floating point promotion for exact comparisons.
 198     if (L.Type == Value::T_Integer || R.Type == Value::T_Integer)
 199       return L.getAsInteger() == R.getAsInteger();
 200     return *L.getAsNumber() == *R.getAsNumber();
 201   case Value::String:
 202     return *L.getAsString() == *R.getAsString();
 203   case Value::Array:
 204     return *L.getAsArray() == *R.getAsArray();
 205   case Value::Object:
 206     return *L.getAsObject() == *R.getAsObject();
 207   }
 208   llvm_unreachable("Unknown value kind");
 209 }
 210
 211 void Path::report(llvm::StringLiteral Msg) {
 212   // Walk up to the root context, and count the number of segments.
 213   unsigned Count = 0;
 214   const Path *P;
 215   for (P = this; P->Parent != nullptr; P = P->Parent)
 216     ++Count;
 217   Path::Root *R = P->Seg.root();
 218   // Fill in the error message and copy the path (in reverse order).
 219   R->ErrorMessage = Msg;
 220   R->ErrorPath.resize(Count);
 221   auto It = R->ErrorPath.begin();
 222   for (P = this; P->Parent != nullptr; P = P->Parent)
 223     *It++ = P->Seg;
 224 }
 225
 226 Error Path::Root::getError() const {
 227   std::string S;
 228   raw_string_ostream OS(S);
 229   OS << (ErrorMessage.empty() ? "invalid JSON contents" : ErrorMessage);
 230   if (ErrorPath.empty()) {
 231     if (!Name.empty())
 232       OS << " when parsing " << Name;
 233   } else {
 234     OS << " at " << (Name.empty() ? "(root)" : Name);
 235     for (const Path::Segment &S : llvm::reverse(ErrorPath)) {
 236       if (S.isField())
 237         OS << '.' << S.field();
 238       else
 239         OS << '[' << S.index() << ']';
 240     }
 241   }
 242   return createStringError(llvm::inconvertibleErrorCode(), S);
 243 }
 244
 245 std::vector<const Object::value_type *> sortedElements(const Object &O) {
 246   std::vector<const Object::value_type *> Elements;
 247   for (const auto &E : O)
 248     Elements.push_back(&E);
 249   llvm::sort(Elements,
 250              [](const Object::value_type *L, const Object::value_type *R) {
 251                return L->first < R->first;
 252              });
 253   return Elements;
 254 }
 255
 256 // Prints a one-line version of a value that isn't our main focus.
 257 // We interleave writes to OS and JOS, exploiting the lack of extra buffering.
 258 // This is OK as we own the implementation.
 259 static void abbreviate(const Value &V, OStream &JOS) {
 260   switch (V.kind()) {
 261   case Value::Array:
 262     JOS.rawValue(V.getAsArray()->empty() ? "[]" : "[ ... ]");
 263     break;
 264   case Value::Object:
 265     JOS.rawValue(V.getAsObject()->empty() ? "{}" : "{ ... }");
 266     break;
 267   case Value::String: {
 268     llvm::StringRef S = *V.getAsString();
 269     if (S.size() < 40) {
 270       JOS.value(V);
 271     } else {
 272       std::string Truncated = fixUTF8(S.take_front(37));
 273       Truncated.append("...");
 274       JOS.value(Truncated);
 275     }
 276     break;
 277   }
 278   default:
 279     JOS.value(V);
 280   }
 281 }
 282
 283 // Prints a semi-expanded version of a value that is our main focus.
 284 // Array/Object entries are printed, but not recursively as they may be huge.
 285 static void abbreviateChildren(const Value &V, OStream &JOS) {
 286   switch (V.kind()) {
 287   case Value::Array:
 288     JOS.array([&] {
 289       for (const auto &I : *V.getAsArray())
 290         abbreviate(I, JOS);
 291     });
 292     break;
 293   case Value::Object:
 294     JOS.object([&] {
 295       for (const auto *KV : sortedElements(*V.getAsObject())) {
 296         JOS.attributeBegin(KV->first);
 297         abbreviate(KV->second, JOS);
 298         JOS.attributeEnd();
 299       }
 300     });
 301     break;
 302   default:
 303     JOS.value(V);
 304   }
 305 }
 306
 307 void Path::Root::printErrorContext(const Value &R, raw_ostream &OS) const {
 308   OStream JOS(OS, /*IndentSize=*/2);
 309   // PrintValue recurses down the path, printing the ancestors of our target.
 310   // Siblings of nodes along the path are printed with abbreviate(), and the
 311   // target itself is printed with the somewhat richer abbreviateChildren().
 312   // 'Recurse' is the lambda itself, to allow recursive calls.
 313   auto PrintValue = [&](const Value &V, ArrayRef<Segment> Path, auto &Recurse) {
 314     // Print the target node itself, with the error as a comment.
 315     // Also used if we can't follow our path, e.g. it names a field that
 316     // *should* exist but doesn't.
 317     auto HighlightCurrent = [&] {
 318       std::string Comment = "error: ";
 319       Comment.append(ErrorMessage.data(), ErrorMessage.size());
 320       JOS.comment(Comment);
 321       abbreviateChildren(V, JOS);
 322     };
 323     if (Path.empty()) // We reached our target.
 324       return HighlightCurrent();
 325     const Segment &S = Path.back(); // Path is in reverse order.
 326     if (S.isField()) {
 327       // Current node is an object, path names a field.
 328       llvm::StringRef FieldName = S.field();
 329       const Object *O = V.getAsObject();
 330       if (!O || !O->get(FieldName))
 331         return HighlightCurrent();
 332       JOS.object([&] {
 333         for (const auto *KV : sortedElements(*O)) {
 334           JOS.attributeBegin(KV->first);
 335           if (FieldName == StringRef(KV->first))
 336             Recurse(KV->second, Path.drop_back(), Recurse);
 337           else
 338             abbreviate(KV->second, JOS);
 339           JOS.attributeEnd();
 340         }
 341       });
 342     } else {
 343       // Current node is an array, path names an element.
 344       const Array *A = V.getAsArray();
 345       if (!A || S.index() >= A->size())
 346         return HighlightCurrent();
 347       JOS.array([&] {
 348         unsigned Current = 0;
 349         for (const auto &V : *A) {
 350           if (Current++ == S.index())
 351             Recurse(V, Path.drop_back(), Recurse);
 352           else
 353             abbreviate(V, JOS);
 354         }
 355       });
 356     }
 357   };
 358   PrintValue(R, ErrorPath, PrintValue);
 359 }
 360
 361 namespace {
 362 // Simple recursive-descent JSON parser.
 363 class Parser {
 364 public:
 365   Parser(StringRef JSON)
 366       : Start(JSON.begin()), P(JSON.begin()), End(JSON.end()) {}
 367
 368   bool checkUTF8() {
 369     size_t ErrOffset;
 370     if (isUTF8(StringRef(Start, End - Start), &ErrOffset))
 371       return true;
 372     P = Start + ErrOffset; // For line/column calculation.
 373     return parseError("Invalid UTF-8 sequence");
 374   }
 375
 376   bool parseValue(Value &Out);
 377
 378   bool assertEnd() {
 379     eatWhitespace();
 380     if (P == End)
 381       return true;
 382     return parseError("Text after end of document");
 383   }
 384
 385   Error takeError() {
 386     assert(Err);
 387     return std::move(*Err);
 388   }
 389
 390 private:
 391   void eatWhitespace() {
 392     while (P != End && (*P == ' ' || *P == '\r' || *P == '\n' || *P == '\t'))
 393       ++P;
 394   }
 395
 396   // On invalid syntax, parseX() functions return false and set Err.
 397   bool parseNumber(char First, Value &Out);
 398   bool parseString(std::string &Out);
 399   bool parseUnicode(std::string &Out);
 400   bool parseError(const char *Msg); // always returns false
 401
 402   char next() { return P == End ? 0 : *P++; }
 403   char peek() { return P == End ? 0 : *P; }
 404   static bool isNumber(char C) {
 405     return C == '0' || C == '1' || C == '2' || C == '3' || C == '4' ||
 406            C == '5' || C == '6' || C == '7' || C == '8' || C == '9' ||
 407            C == 'e' || C == 'E' || C == '+' || C == '-' || C == '.';
 408   }
 409
 410   std::optional<Error> Err;
 411   const char *Start, *P, *End;
 412 };
 413 } // namespace
 414
 415 bool Parser::parseValue(Value &Out) {
 416   eatWhitespace();
 417   if (P == End)
 418     return parseError("Unexpected EOF");
 419   switch (char C = next()) {
 420   // Bare null/true/false are easy - first char identifies them.
 421   case 'n':
 422     Out = nullptr;
 423     return (next() == 'u' && next() == 'l' && next() == 'l') ||
 424            parseError("Invalid JSON value (null?)");
 425   case 't':
 426     Out = true;
 427     return (next() == 'r' && next() == 'u' && next() == 'e') ||
 428            parseError("Invalid JSON value (true?)");
 429   case 'f':
 430     Out = false;
 431     return (next() == 'a' && next() == 'l' && next() == 's' && next() == 'e') ||
 432            parseError("Invalid JSON value (false?)");
 433   case '"': {
 434     std::string S;
 435     if (parseString(S)) {
 436       Out = std::move(S);
 437       return true;
 438     }
 439     return false;
 440   }
 441   case '[': {
 442     Out = Array{};
 443     Array &A = *Out.getAsArray();
 444     eatWhitespace();
 445     if (peek() == ']') {
 446       ++P;
 447       return true;
 448     }
 449     for (;;) {
 450       A.emplace_back(nullptr);
 451       if (!parseValue(A.back()))
 452         return false;
 453       eatWhitespace();
 454       switch (next()) {
 455       case ',':
 456         eatWhitespace();
 457         continue;
 458       case ']':
 459         return true;
 460       default:
 461         return parseError("Expected , or ] after array element");
 462       }
 463     }
 464   }
 465   case '{': {
 466     Out = Object{};
 467     Object &O = *Out.getAsObject();
 468     eatWhitespace();
 469     if (peek() == '}') {
 470       ++P;
 471       return true;
 472     }
 473     for (;;) {
 474       if (next() != '"')
 475         return parseError("Expected object key");
 476       std::string K;
 477       if (!parseString(K))
 478         return false;
 479       eatWhitespace();
 480       if (next() != ':')
 481         return parseError("Expected : after object key");
 482       eatWhitespace();
 483       if (!parseValue(O[std::move(K)]))
 484         return false;
 485       eatWhitespace();
 486       switch (next()) {
 487       case ',':
 488         eatWhitespace();
 489         continue;
 490       case '}':
 491         return true;
 492       default:
 493         return parseError("Expected , or } after object property");
 494       }
 495     }
 496   }
 497   default:
 498     if (isNumber(C))
 499       return parseNumber(C, Out);
 500     return parseError("Invalid JSON value");
 501   }
 502 }
 503
 504 bool Parser::parseNumber(char First, Value &Out) {
 505   // Read the number into a string. (Must be null-terminated for strto*).
 506   SmallString<24> S;
 507   S.push_back(First);
 508   while (isNumber(peek()))
 509     S.push_back(next());
 510   char *End;
 511   // Try first to parse as integer, and if so preserve full 64 bits.
 512   // We check for errno for out of bounds errors and for End == S.end()
 513   // to make sure that the numeric string is not malformed.
 514   errno = 0;
 515   int64_t I = std::strtoll(S.c_str(), &End, 10);
 516   if (End == S.end() && errno != ERANGE) {
 517     Out = int64_t(I);
 518     return true;
 519   }
 520   // strtroull has a special handling for negative numbers, but in this
 521   // case we don't want to do that because negative numbers were already
 522   // handled in the previous block.
 523   if (First != '-') {
 524     errno = 0;
 525     uint64_t UI = std::strtoull(S.c_str(), &End, 10);
 526     if (End == S.end() && errno != ERANGE) {
 527       Out = UI;
 528       return true;
 529     }
 530   }
 531   // If it's not an integer
 532   Out = std::strtod(S.c_str(), &End);
 533   return End == S.end() || parseError("Invalid JSON value (number?)");
 534 }
 535
 536 bool Parser::parseString(std::string &Out) {
 537   // leading quote was already consumed.
 538   for (char C = next(); C != '"'; C = next()) {
 539     if (LLVM_UNLIKELY(P == End))
 540       return parseError("Unterminated string");
 541     if (LLVM_UNLIKELY((C & 0x1f) == C))
 542       return parseError("Control character in string");
 543     if (LLVM_LIKELY(C != '\\')) {
 544       Out.push_back(C);
 545       continue;
 546     }
 547     // Handle escape sequence.
 548     switch (C = next()) {
 549     case '"':
 550     case '\\':
 551     case '/':
 552       Out.push_back(C);
 553       break;
 554     case 'b':
 555       Out.push_back('\b');
 556       break;
 557     case 'f':
 558       Out.push_back('\f');
 559       break;
 560     case 'n':
 561       Out.push_back('\n');
 562       break;
 563     case 'r':
 564       Out.push_back('\r');
 565       break;
 566     case 't':
 567       Out.push_back('\t');
 568       break;
 569     case 'u':
 570       if (!parseUnicode(Out))
 571         return false;
 572       break;
 573     default:
 574       return parseError("Invalid escape sequence");
 575     }
 576   }
 577   return true;
 578 }
 579
 580 static void encodeUtf8(uint32_t Rune, std::string &Out) {
 581   if (Rune < 0x80) {
 582     Out.push_back(Rune & 0x7F);
 583   } else if (Rune < 0x800) {
 584     uint8_t FirstByte = 0xC0 | ((Rune & 0x7C0) >> 6);
 585     uint8_t SecondByte = 0x80 | (Rune & 0x3F);
 586     Out.push_back(FirstByte);
 587     Out.push_back(SecondByte);
 588   } else if (Rune < 0x10000) {
 589     uint8_t FirstByte = 0xE0 | ((Rune & 0xF000) >> 12);
 590     uint8_t SecondByte = 0x80 | ((Rune & 0xFC0) >> 6);
 591     uint8_t ThirdByte = 0x80 | (Rune & 0x3F);
 592     Out.push_back(FirstByte);
 593     Out.push_back(SecondByte);
 594     Out.push_back(ThirdByte);
 595   } else if (Rune < 0x110000) {
 596     uint8_t FirstByte = 0xF0 | ((Rune & 0x1F0000) >> 18);
 597     uint8_t SecondByte = 0x80 | ((Rune & 0x3F000) >> 12);
 598     uint8_t ThirdByte = 0x80 | ((Rune & 0xFC0) >> 6);
 599     uint8_t FourthByte = 0x80 | (Rune & 0x3F);
 600     Out.push_back(FirstByte);
 601     Out.push_back(SecondByte);
 602     Out.push_back(ThirdByte);
 603     Out.push_back(FourthByte);
 604   } else {
 605     llvm_unreachable("Invalid codepoint");
 606   }
 607 }
 608
 609 // Parse a UTF-16 \uNNNN escape sequence. "\u" has already been consumed.
 610 // May parse several sequential escapes to ensure proper surrogate handling.
 611 // We do not use ConvertUTF.h, it can't accept and replace unpaired surrogates.
 612 // These are invalid Unicode but valid JSON (RFC 8259, section 8.2).
 613 bool Parser::parseUnicode(std::string &Out) {
 614   // Invalid UTF is not a JSON error (RFC 8529§8.2). It gets replaced by U+FFFD.
 615   auto Invalid = [&] { Out.append(/* UTF-8 */ {'\xef', '\xbf', '\xbd'}); };
 616   // Decodes 4 hex digits from the stream into Out, returns false on error.
 617   auto Parse4Hex = [this](uint16_t &Out) -> bool {
 618     Out = 0;
 619     char Bytes[] = {next(), next(), next(), next()};
 620     for (unsigned char C : Bytes) {
 621       if (!std::isxdigit(C))
 622         return parseError("Invalid \\u escape sequence");
 623       Out <<= 4;
 624       Out |= (C > '9') ? (C & ~0x20) - 'A' + 10 : (C - '0');
 625     }
 626     return true;
 627   };
 628   uint16_t First; // UTF-16 code unit from the first \u escape.
 629   if (!Parse4Hex(First))
 630     return false;
 631
 632   // We loop to allow proper surrogate-pair error handling.
 633   while (true) {
 634     // Case 1: the UTF-16 code unit is already a codepoint in the BMP.
 635     if (LLVM_LIKELY(First < 0xD800 || First >= 0xE000)) {
 636       encodeUtf8(First, Out);
 637       return true;
 638     }
 639
 640     // Case 2: it's an (unpaired) trailing surrogate.
 641     if (LLVM_UNLIKELY(First >= 0xDC00)) {
 642       Invalid();
 643       return true;
 644     }
 645
 646     // Case 3: it's a leading surrogate. We expect a trailing one next.
 647     // Case 3a: there's no trailing \u escape. Don't advance in the stream.
 648     if (LLVM_UNLIKELY(P + 2 > End || *P != '\\' || *(P + 1) != 'u')) {
 649       Invalid(); // Leading surrogate was unpaired.
 650       return true;
 651     }
 652     P += 2;
 653     uint16_t Second;
 654     if (!Parse4Hex(Second))
 655       return false;
 656     // Case 3b: there was another \u escape, but it wasn't a trailing surrogate.
 657     if (LLVM_UNLIKELY(Second < 0xDC00 || Second >= 0xE000)) {
 658       Invalid();      // Leading surrogate was unpaired.
 659       First = Second; // Second escape still needs to be processed.
 660       continue;
 661     }
 662     // Case 3c: a valid surrogate pair encoding an astral codepoint.
 663     encodeUtf8(0x10000 | ((First - 0xD800) << 10) | (Second - 0xDC00), Out);
 664     return true;
 665   }
 666 }
 667
 668 bool Parser::parseError(const char *Msg) {
 669   int Line = 1;
 670   const char *StartOfLine = Start;
 671   for (const char *X = Start; X < P; ++X) {
 672     if (*X == 0x0A) {
 673       ++Line;
 674       StartOfLine = X + 1;
 675     }
 676   }
 677   Err.emplace(
 678       std::make_unique<ParseError>(Msg, Line, P - StartOfLine, P - Start));
 679   return false;
 680 }
 681
 682 Expected<Value> parse(StringRef JSON) {
 683   Parser P(JSON);
 684   Value E = nullptr;
 685   if (P.checkUTF8())
 686     if (P.parseValue(E))
 687       if (P.assertEnd())
 688         return std::move(E);
 689   return P.takeError();
 690 }
 691 char ParseError::ID = 0;
 692
 693 bool isUTF8(llvm::StringRef S, size_t *ErrOffset) {
 694   // Fast-path for ASCII, which is valid UTF-8.
 695   if (LLVM_LIKELY(isASCII(S)))
 696     return true;
 697
 698   const UTF8 *Data = reinterpret_cast<const UTF8 *>(S.data()), *Rest = Data;
 699   if (LLVM_LIKELY(isLegalUTF8String(&Rest, Data + S.size())))
 700     return true;
 701
 702   if (ErrOffset)
 703     *ErrOffset = Rest - Data;
 704   return false;
 705 }
 706
 707 std::string fixUTF8(llvm::StringRef S) {
 708   // This isn't particularly efficient, but is only for error-recovery.
 709   std::vector<UTF32> Codepoints(S.size()); // 1 codepoint per byte suffices.
 710   const UTF8 *In8 = reinterpret_cast<const UTF8 *>(S.data());
 711   UTF32 *Out32 = Codepoints.data();
 712   ConvertUTF8toUTF32(&In8, In8 + S.size(), &Out32, Out32 + Codepoints.size(),
 713                      lenientConversion);
 714   Codepoints.resize(Out32 - Codepoints.data());
 715   std::string Res(4 * Codepoints.size(), 0); // 4 bytes per codepoint suffice
 716   const UTF32 *In32 = Codepoints.data();
 717   UTF8 *Out8 = reinterpret_cast<UTF8 *>(&Res[0]);
 718   ConvertUTF32toUTF8(&In32, In32 + Codepoints.size(), &Out8, Out8 + Res.size(),
 719                      strictConversion);
 720   Res.resize(reinterpret_cast<char *>(Out8) - Res.data());
 721   return Res;
 722 }
 723
 724 static void quote(llvm::raw_ostream &OS, llvm::StringRef S) {
 725   OS << '\"';
 726   for (unsigned char C : S) {
 727     if (C == 0x22 || C == 0x5C)
 728       OS << '\\';
 729     if (C >= 0x20) {
 730       OS << C;
 731       continue;
 732     }
 733     OS << '\\';
 734     switch (C) {
 735     // A few characters are common enough to make short escapes worthwhile.
 736     case '\t':
 737       OS << 't';
 738       break;
 739     case '\n':
 740       OS << 'n';
 741       break;
 742     case '\r':
 743       OS << 'r';
 744       break;
 745     default:
 746       OS << 'u';
 747       llvm::write_hex(OS, C, llvm::HexPrintStyle::Lower, 4);
 748       break;
 749     }
 750   }
 751   OS << '\"';
 752 }
 753
 754 void llvm::json::OStream::value(const Value &V) {
 755   switch (V.kind()) {
 756   case Value::Null:
 757     valueBegin();
 758     OS << "null";
 759     return;
 760   case Value::Boolean:
 761     valueBegin();
 762     OS << (*V.getAsBoolean() ? "true" : "false");
 763     return;
 764   case Value::Number:
 765     valueBegin();
 766     if (V.Type == Value::T_Integer)
 767       OS << *V.getAsInteger();
 768     else if (V.Type == Value::T_UINT64)
 769       OS << *V.getAsUINT64();
 770     else
 771       OS << format("%.*g", std::numeric_limits<double>::max_digits10,
 772                    *V.getAsNumber());
 773     return;
 774   case Value::String:
 775     valueBegin();
 776     quote(OS, *V.getAsString());
 777     return;
 778   case Value::Array:
 779     return array([&] {
 780       for (const Value &E : *V.getAsArray())
 781         value(E);
 782     });
 783   case Value::Object:
 784     return object([&] {
 785       for (const Object::value_type *E : sortedElements(*V.getAsObject()))
 786         attribute(E->first, E->second);
 787     });
 788   }
 789 }
 790
 791 void llvm::json::OStream::valueBegin() {
 792   assert(Stack.back().Ctx != Object && "Only attributes allowed here");
 793   if (Stack.back().HasValue) {
 794     assert(Stack.back().Ctx != Singleton && "Only one value allowed here");
 795     OS << ',';
 796   }
 797   if (Stack.back().Ctx == Array)
 798     newline();
 799   flushComment();
 800   Stack.back().HasValue = true;
 801 }
 802
 803 void OStream::comment(llvm::StringRef Comment) {
 804   assert(PendingComment.empty() && "Only one comment per value!");
 805   PendingComment = Comment;
 806 }
 807
 808 void OStream::flushComment() {
 809   if (PendingComment.empty())
 810     return;
 811   OS << (IndentSize ? "/* " : "/*");
 812   // Be sure not to accidentally emit "*/". Transform to "* /".
 813   while (!PendingComment.empty()) {
 814     auto Pos = PendingComment.find("*/");
 815     if (Pos == StringRef::npos) {
 816       OS << PendingComment;
 817       PendingComment = "";
 818     } else {
 819       OS << PendingComment.take_front(Pos) << "* /";
 820       PendingComment = PendingComment.drop_front(Pos + 2);
 821     }
 822   }
 823   OS << (IndentSize ? " */" : "*/");
 824   // Comments are on their own line unless attached to an attribute value.
 825   if (Stack.size() > 1 && Stack.back().Ctx == Singleton) {
 826     if (IndentSize)
 827       OS << ' ';
 828   } else {
 829     newline();
 830   }
 831 }
 832
 833 void llvm::json::OStream::newline() {
 834   if (IndentSize) {
 835     OS.write('\n');
 836     OS.indent(Indent);
 837   }
 838 }
 839
 840 void llvm::json::OStream::arrayBegin() {
 841   valueBegin();
 842   Stack.emplace_back();
 843   Stack.back().Ctx = Array;
 844   Indent += IndentSize;
 845   OS << '[';
 846 }
 847
 848 void llvm::json::OStream::arrayEnd() {
 849   assert(Stack.back().Ctx == Array);
 850   Indent -= IndentSize;
 851   if (Stack.back().HasValue)
 852     newline();
 853   OS << ']';
 854   assert(PendingComment.empty());
 855   Stack.pop_back();
 856   assert(!Stack.empty());
 857 }
 858
 859 void llvm::json::OStream::objectBegin() {
 860   valueBegin();
 861   Stack.emplace_back();
 862   Stack.back().Ctx = Object;
 863   Indent += IndentSize;
 864   OS << '{';
 865 }
 866
 867 void llvm::json::OStream::objectEnd() {
 868   assert(Stack.back().Ctx == Object);
 869   Indent -= IndentSize;
 870   if (Stack.back().HasValue)
 871     newline();
 872   OS << '}';
 873   assert(PendingComment.empty());
 874   Stack.pop_back();
 875   assert(!Stack.empty());
 876 }
 877
 878 void llvm::json::OStream::attributeBegin(llvm::StringRef Key) {
 879   assert(Stack.back().Ctx == Object);
 880   if (Stack.back().HasValue)
 881     OS << ',';
 882   newline();
 883   flushComment();
 884   Stack.back().HasValue = true;
 885   Stack.emplace_back();
 886   Stack.back().Ctx = Singleton;
 887   if (LLVM_LIKELY(isUTF8(Key))) {
 888     quote(OS, Key);
 889   } else {
 890     assert(false && "Invalid UTF-8 in attribute key");
 891     quote(OS, fixUTF8(Key));
 892   }
 893   OS.write(':');
 894   if (IndentSize)
 895     OS.write(' ');
 896 }
 897
 898 void llvm::json::OStream::attributeEnd() {
 899   assert(Stack.back().Ctx == Singleton);
 900   assert(Stack.back().HasValue && "Attribute must have a value");
 901   assert(PendingComment.empty());
 902   Stack.pop_back();
 903   assert(Stack.back().Ctx == Object);
 904 }
 905
 906 raw_ostream &llvm::json::OStream::rawValueBegin() {
 907   valueBegin();
 908   Stack.emplace_back();
 909   Stack.back().Ctx = RawValue;
 910   return OS;
 911 }
 912
 913 void llvm::json::OStream::rawValueEnd() {
 914   assert(Stack.back().Ctx == RawValue);
 915   Stack.pop_back();
 916 }
 917
 918 } // namespace json
 919 } // namespace llvm
 920
 921 void llvm::format_provider<llvm::json::Value>::format(
 922     const llvm::json::Value &E, raw_ostream &OS, StringRef Options) {
 923   unsigned IndentAmount = 0;
 924   if (!Options.empty() && Options.getAsInteger(/*Radix=*/10, IndentAmount))
 925     llvm_unreachable("json::Value format options should be an integer");
 926   json::OStream(OS, IndentAmount).value(E);
 927 }
 928