llvm/lib/Support/JSON.cpp

   1 //=== JSON.cpp - JSON value, parsing and serialization - C++ -----------*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===---------------------------------------------------------------------===//
   8
   9 #include "llvm/Support/JSON.h"
  10 #include "llvm/ADT/STLExtras.h"
  11 #include "llvm/ADT/StringExtras.h"
  12 #include "llvm/Support/ConvertUTF.h"
  13 #include "llvm/Support/Error.h"
  14 #include "llvm/Support/Format.h"
  15 #include "llvm/Support/NativeFormatting.h"
  16 #include "llvm/Support/raw_ostream.h"
  17 #include <cctype>
  18 #include <cerrno>
  19 #include <optional>
  20
  21 namespace llvm {
  22 namespace json {
  23
  24 Value &Object::operator[](const ObjectKey &K) {
  25   return try_emplace(K, nullptr).first->getSecond();
  26 }
  27 Value &Object::operator[](ObjectKey &&K) {
  28   return try_emplace(std::move(K), nullptr).first->getSecond();
  29 }
  30 Value *Object::get(StringRef K) {
  31   auto I = find(K);
  32   if (I == end())
  33     return nullptr;
  34   return &I->second;
  35 }
  36 const Value *Object::get(StringRef K) const {
  37   auto I = find(K);
  38   if (I == end())
  39     return nullptr;
  40   return &I->second;
  41 }
  42 std::optional<std::nullptr_t> Object::getNull(StringRef K) const {
  43   if (auto *V = get(K))
  44     return V->getAsNull();
  45   return std::nullopt;
  46 }
  47 std::optional<bool> Object::getBoolean(StringRef K) const {
  48   if (auto *V = get(K))
  49     return V->getAsBoolean();
  50   return std::nullopt;
  51 }
  52 std::optional<double> Object::getNumber(StringRef K) const {
  53   if (auto *V = get(K))
  54     return V->getAsNumber();
  55   return std::nullopt;
  56 }
  57 std::optional<int64_t> Object::getInteger(StringRef K) const {
  58   if (auto *V = get(K))
  59     return V->getAsInteger();
  60   return std::nullopt;
  61 }
  62 std::optional<llvm::StringRef> Object::getString(StringRef K) const {
  63   if (auto *V = get(K))
  64     return V->getAsString();
  65   return std::nullopt;
  66 }
  67 const json::Object *Object::getObject(StringRef K) const {
  68   if (auto *V = get(K))
  69     return V->getAsObject();
  70   return nullptr;
  71 }
  72 json::Object *Object::getObject(StringRef K) {
  73   if (auto *V = get(K))
  74     return V->getAsObject();
  75   return nullptr;
  76 }
  77 const json::Array *Object::getArray(StringRef K) const {
  78   if (auto *V = get(K))
  79     return V->getAsArray();
  80   return nullptr;
  81 }
  82 json::Array *Object::getArray(StringRef K) {
  83   if (auto *V = get(K))
  84     return V->getAsArray();
  85   return nullptr;
  86 }
  87 bool operator==(const Object &LHS, const Object &RHS) {
  88   if (LHS.size() != RHS.size())
  89     return false;
  90   for (const auto &L : LHS) {
  91     auto R = RHS.find(L.first);
  92     if (R == RHS.end() || L.second != R->second)
  93       return false;
  94   }
  95   return true;
  96 }
  97
  98 Array::Array(std::initializer_list<Value> Elements) {
  99   V.reserve(Elements.size());
 100   for (const Value &V : Elements) {
 101     emplace_back(nullptr);
 102     back().moveFrom(std::move(V));
 103   }
 104 }
 105
 106 Value::Value(std::initializer_list<Value> Elements)
 107     : Value(json::Array(Elements)) {}
 108
 109 void Value::copyFrom(const Value &M) {
 110   Type = M.Type;
 111   switch (Type) {
 112   case T_Null:
 113   case T_Boolean:
 114   case T_Double:
 115   case T_Integer:
 116   case T_UINT64:
 117     memcpy(&Union, &M.Union, sizeof(Union));
 118     break;
 119   case T_StringRef:
 120     create<StringRef>(M.as<StringRef>());
 121     break;
 122   case T_String:
 123     create<std::string>(M.as<std::string>());
 124     break;
 125   case T_Object:
 126     create<json::Object>(M.as<json::Object>());
 127     break;
 128   case T_Array:
 129     create<json::Array>(M.as<json::Array>());
 130     break;
 131   }
 132 }
 133
 134 void Value::moveFrom(const Value &&M) {
 135   Type = M.Type;
 136   switch (Type) {
 137   case T_Null:
 138   case T_Boolean:
 139   case T_Double:
 140   case T_Integer:
 141   case T_UINT64:
 142     memcpy(&Union, &M.Union, sizeof(Union));
 143     break;
 144   case T_StringRef:
 145     create<StringRef>(M.as<StringRef>());
 146     break;
 147   case T_String:
 148     create<std::string>(std::move(M.as<std::string>()));
 149     M.Type = T_Null;
 150     break;
 151   case T_Object:
 152     create<json::Object>(std::move(M.as<json::Object>()));
 153     M.Type = T_Null;
 154     break;
 155   case T_Array:
 156     create<json::Array>(std::move(M.as<json::Array>()));
 157     M.Type = T_Null;
 158     break;
 159   }
 160 }
 161
 162 void Value::destroy() {
 163   switch (Type) {
 164   case T_Null:
 165   case T_Boolean:
 166   case T_Double:
 167   case T_Integer:
 168   case T_UINT64:
 169     break;
 170   case T_StringRef:
 171     as<StringRef>().~StringRef();
 172     break;
 173   case T_String:
 174     as<std::string>().~basic_string();
 175     break;
 176   case T_Object:
 177     as<json::Object>().~Object();
 178     break;
 179   case T_Array:
 180     as<json::Array>().~Array();
 181     break;
 182   }
 183 }
 184
 185 bool operator==(const Value &L, const Value &R) {
 186   if (L.kind() != R.kind())
 187     return false;
 188   switch (L.kind()) {
 189   case Value::Null:
 190     return *L.getAsNull() == *R.getAsNull();
 191   case Value::Boolean:
 192     return *L.getAsBoolean() == *R.getAsBoolean();
 193   case Value::Number:
 194     // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323
 195     // The same integer must convert to the same double, per the standard.
 196     // However we see 64-vs-80-bit precision comparisons with gcc-7 -O3 -m32.
 197     // So we avoid floating point promotion for exact comparisons.
 198     if (L.Type == Value::T_Integer || R.Type == Value::T_Integer)
 199       return L.getAsInteger() == R.getAsInteger();
 200     return *L.getAsNumber() == *R.getAsNumber();
 201   case Value::String:
 202     return *L.getAsString() == *R.getAsString();
 203   case Value::Array:
 204     return *L.getAsArray() == *R.getAsArray();
 205   case Value::Object:
 206     return *L.getAsObject() == *R.getAsObject();
 207   }
 208   llvm_unreachable("Unknown value kind");
 209 }
 210
 211 void Path::report(llvm::StringLiteral Msg) {
 212   // Walk up to the root context, and count the number of segments.
 213   unsigned Count = 0;
 214   const Path *P;
 215   for (P = this; P->Parent != nullptr; P = P->Parent)
 216     ++Count;
 217   Path::Root *R = P->Seg.root();
 218   // Fill in the error message and copy the path (in reverse order).
 219   R->ErrorMessage = Msg;
 220   R->ErrorPath.resize(Count);
 221   auto It = R->ErrorPath.begin();
 222   for (P = this; P->Parent != nullptr; P = P->Parent)
 223     *It++ = P->Seg;
 224 }
 225
 226 Error Path::Root::getError() const {
 227   std::string S;
 228   raw_string_ostream OS(S);
 229   OS << (ErrorMessage.empty() ? "invalid JSON contents" : ErrorMessage);
 230   if (ErrorPath.empty()) {
 231     if (!Name.empty())
 232       OS << " when parsing " << Name;
 233   } else {
 234     OS << " at " << (Name.empty() ? "(root)" : Name);
 235     for (const Path::Segment &S : llvm::reverse(ErrorPath)) {
 236       if (S.isField())
 237         OS << '.' << S.field();
 238       else
 239         OS << '[' << S.index() << ']';
 240     }
 241   }
 242   return createStringError(llvm::inconvertibleErrorCode(), OS.str());
 243 }
 244
 245 namespace {
 246
 247 std::vector<const Object::value_type *> sortedElements(const Object &O) {
 248   std::vector<const Object::value_type *> Elements;
 249   for (const auto &E : O)
 250     Elements.push_back(&E);
 251   llvm::sort(Elements,
 252              [](const Object::value_type *L, const Object::value_type *R) {
 253                return L->first < R->first;
 254              });
 255   return Elements;
 256 }
 257
 258 // Prints a one-line version of a value that isn't our main focus.
 259 // We interleave writes to OS and JOS, exploiting the lack of extra buffering.
 260 // This is OK as we own the implementation.
 261 void abbreviate(const Value &V, OStream &JOS) {
 262   switch (V.kind()) {
 263   case Value::Array:
 264     JOS.rawValue(V.getAsArray()->empty() ? "[]" : "[ ... ]");
 265     break;
 266   case Value::Object:
 267     JOS.rawValue(V.getAsObject()->empty() ? "{}" : "{ ... }");
 268     break;
 269   case Value::String: {
 270     llvm::StringRef S = *V.getAsString();
 271     if (S.size() < 40) {
 272       JOS.value(V);
 273     } else {
 274       std::string Truncated = fixUTF8(S.take_front(37));
 275       Truncated.append("...");
 276       JOS.value(Truncated);
 277     }
 278     break;
 279   }
 280   default:
 281     JOS.value(V);
 282   }
 283 }
 284
 285 // Prints a semi-expanded version of a value that is our main focus.
 286 // Array/Object entries are printed, but not recursively as they may be huge.
 287 void abbreviateChildren(const Value &V, OStream &JOS) {
 288   switch (V.kind()) {
 289   case Value::Array:
 290     JOS.array([&] {
 291       for (const auto &I : *V.getAsArray())
 292         abbreviate(I, JOS);
 293     });
 294     break;
 295   case Value::Object:
 296     JOS.object([&] {
 297       for (const auto *KV : sortedElements(*V.getAsObject())) {
 298         JOS.attributeBegin(KV->first);
 299         abbreviate(KV->second, JOS);
 300         JOS.attributeEnd();
 301       }
 302     });
 303     break;
 304   default:
 305     JOS.value(V);
 306   }
 307 }
 308
 309 } // namespace
 310
 311 void Path::Root::printErrorContext(const Value &R, raw_ostream &OS) const {
 312   OStream JOS(OS, /*IndentSize=*/2);
 313   // PrintValue recurses down the path, printing the ancestors of our target.
 314   // Siblings of nodes along the path are printed with abbreviate(), and the
 315   // target itself is printed with the somewhat richer abbreviateChildren().
 316   // 'Recurse' is the lambda itself, to allow recursive calls.
 317   auto PrintValue = [&](const Value &V, ArrayRef<Segment> Path, auto &Recurse) {
 318     // Print the target node itself, with the error as a comment.
 319     // Also used if we can't follow our path, e.g. it names a field that
 320     // *should* exist but doesn't.
 321     auto HighlightCurrent = [&] {
 322       std::string Comment = "error: ";
 323       Comment.append(ErrorMessage.data(), ErrorMessage.size());
 324       JOS.comment(Comment);
 325       abbreviateChildren(V, JOS);
 326     };
 327     if (Path.empty()) // We reached our target.
 328       return HighlightCurrent();
 329     const Segment &S = Path.back(); // Path is in reverse order.
 330     if (S.isField()) {
 331       // Current node is an object, path names a field.
 332       llvm::StringRef FieldName = S.field();
 333       const Object *O = V.getAsObject();
 334       if (!O || !O->get(FieldName))
 335         return HighlightCurrent();
 336       JOS.object([&] {
 337         for (const auto *KV : sortedElements(*O)) {
 338           JOS.attributeBegin(KV->first);
 339           if (FieldName.equals(KV->first))
 340             Recurse(KV->second, Path.drop_back(), Recurse);
 341           else
 342             abbreviate(KV->second, JOS);
 343           JOS.attributeEnd();
 344         }
 345       });
 346     } else {
 347       // Current node is an array, path names an element.
 348       const Array *A = V.getAsArray();
 349       if (!A || S.index() >= A->size())
 350         return HighlightCurrent();
 351       JOS.array([&] {
 352         unsigned Current = 0;
 353         for (const auto &V : *A) {
 354           if (Current++ == S.index())
 355             Recurse(V, Path.drop_back(), Recurse);
 356           else
 357             abbreviate(V, JOS);
 358         }
 359       });
 360     }
 361   };
 362   PrintValue(R, ErrorPath, PrintValue);
 363 }
 364
 365 namespace {
 366 // Simple recursive-descent JSON parser.
 367 class Parser {
 368 public:
 369   Parser(StringRef JSON)
 370       : Start(JSON.begin()), P(JSON.begin()), End(JSON.end()) {}
 371
 372   bool checkUTF8() {
 373     size_t ErrOffset;
 374     if (isUTF8(StringRef(Start, End - Start), &ErrOffset))
 375       return true;
 376     P = Start + ErrOffset; // For line/column calculation.
 377     return parseError("Invalid UTF-8 sequence");
 378   }
 379
 380   bool parseValue(Value &Out);
 381
 382   bool assertEnd() {
 383     eatWhitespace();
 384     if (P == End)
 385       return true;
 386     return parseError("Text after end of document");
 387   }
 388
 389   Error takeError() {
 390     assert(Err);
 391     return std::move(*Err);
 392   }
 393
 394 private:
 395   void eatWhitespace() {
 396     while (P != End && (*P == ' ' || *P == '\r' || *P == '\n' || *P == '\t'))
 397       ++P;
 398   }
 399
 400   // On invalid syntax, parseX() functions return false and set Err.
 401   bool parseNumber(char First, Value &Out);
 402   bool parseString(std::string &Out);
 403   bool parseUnicode(std::string &Out);
 404   bool parseError(const char *Msg); // always returns false
 405
 406   char next() { return P == End ? 0 : *P++; }
 407   char peek() { return P == End ? 0 : *P; }
 408   static bool isNumber(char C) {
 409     return C == '0' || C == '1' || C == '2' || C == '3' || C == '4' ||
 410            C == '5' || C == '6' || C == '7' || C == '8' || C == '9' ||
 411            C == 'e' || C == 'E' || C == '+' || C == '-' || C == '.';
 412   }
 413
 414   std::optional<Error> Err;
 415   const char *Start, *P, *End;
 416 };
 417
 418 bool Parser::parseValue(Value &Out) {
 419   eatWhitespace();
 420   if (P == End)
 421     return parseError("Unexpected EOF");
 422   switch (char C = next()) {
 423   // Bare null/true/false are easy - first char identifies them.
 424   case 'n':
 425     Out = nullptr;
 426     return (next() == 'u' && next() == 'l' && next() == 'l') ||
 427            parseError("Invalid JSON value (null?)");
 428   case 't':
 429     Out = true;
 430     return (next() == 'r' && next() == 'u' && next() == 'e') ||
 431            parseError("Invalid JSON value (true?)");
 432   case 'f':
 433     Out = false;
 434     return (next() == 'a' && next() == 'l' && next() == 's' && next() == 'e') ||
 435            parseError("Invalid JSON value (false?)");
 436   case '"': {
 437     std::string S;
 438     if (parseString(S)) {
 439       Out = std::move(S);
 440       return true;
 441     }
 442     return false;
 443   }
 444   case '[': {
 445     Out = Array{};
 446     Array &A = *Out.getAsArray();
 447     eatWhitespace();
 448     if (peek() == ']') {
 449       ++P;
 450       return true;
 451     }
 452     for (;;) {
 453       A.emplace_back(nullptr);
 454       if (!parseValue(A.back()))
 455         return false;
 456       eatWhitespace();
 457       switch (next()) {
 458       case ',':
 459         eatWhitespace();
 460         continue;
 461       case ']':
 462         return true;
 463       default:
 464         return parseError("Expected , or ] after array element");
 465       }
 466     }
 467   }
 468   case '{': {
 469     Out = Object{};
 470     Object &O = *Out.getAsObject();
 471     eatWhitespace();
 472     if (peek() == '}') {
 473       ++P;
 474       return true;
 475     }
 476     for (;;) {
 477       if (next() != '"')
 478         return parseError("Expected object key");
 479       std::string K;
 480       if (!parseString(K))
 481         return false;
 482       eatWhitespace();
 483       if (next() != ':')
 484         return parseError("Expected : after object key");
 485       eatWhitespace();
 486       if (!parseValue(O[std::move(K)]))
 487         return false;
 488       eatWhitespace();
 489       switch (next()) {
 490       case ',':
 491         eatWhitespace();
 492         continue;
 493       case '}':
 494         return true;
 495       default:
 496         return parseError("Expected , or } after object property");
 497       }
 498     }
 499   }
 500   default:
 501     if (isNumber(C))
 502       return parseNumber(C, Out);
 503     return parseError("Invalid JSON value");
 504   }
 505 }
 506
 507 bool Parser::parseNumber(char First, Value &Out) {
 508   // Read the number into a string. (Must be null-terminated for strto*).
 509   SmallString<24> S;
 510   S.push_back(First);
 511   while (isNumber(peek()))
 512     S.push_back(next());
 513   char *End;
 514   // Try first to parse as integer, and if so preserve full 64 bits.
 515   // We check for errno for out of bounds errors and for End == S.end()
 516   // to make sure that the numeric string is not malformed.
 517   errno = 0;
 518   int64_t I = std::strtoll(S.c_str(), &End, 10);
 519   if (End == S.end() && errno != ERANGE) {
 520     Out = int64_t(I);
 521     return true;
 522   }
 523   // strtroull has a special handling for negative numbers, but in this
 524   // case we don't want to do that because negative numbers were already
 525   // handled in the previous block.
 526   if (First != '-') {
 527     errno = 0;
 528     uint64_t UI = std::strtoull(S.c_str(), &End, 10);
 529     if (End == S.end() && errno != ERANGE) {
 530       Out = UI;
 531       return true;
 532     }
 533   }
 534   // If it's not an integer
 535   Out = std::strtod(S.c_str(), &End);
 536   return End == S.end() || parseError("Invalid JSON value (number?)");
 537 }
 538
 539 bool Parser::parseString(std::string &Out) {
 540   // leading quote was already consumed.
 541   for (char C = next(); C != '"'; C = next()) {
 542     if (LLVM_UNLIKELY(P == End))
 543       return parseError("Unterminated string");
 544     if (LLVM_UNLIKELY((C & 0x1f) == C))
 545       return parseError("Control character in string");
 546     if (LLVM_LIKELY(C != '\\')) {
 547       Out.push_back(C);
 548       continue;
 549     }
 550     // Handle escape sequence.
 551     switch (C = next()) {
 552     case '"':
 553     case '\\':
 554     case '/':
 555       Out.push_back(C);
 556       break;
 557     case 'b':
 558       Out.push_back('\b');
 559       break;
 560     case 'f':
 561       Out.push_back('\f');
 562       break;
 563     case 'n':
 564       Out.push_back('\n');
 565       break;
 566     case 'r':
 567       Out.push_back('\r');
 568       break;
 569     case 't':
 570       Out.push_back('\t');
 571       break;
 572     case 'u':
 573       if (!parseUnicode(Out))
 574         return false;
 575       break;
 576     default:
 577       return parseError("Invalid escape sequence");
 578     }
 579   }
 580   return true;
 581 }
 582
 583 static void encodeUtf8(uint32_t Rune, std::string &Out) {
 584   if (Rune < 0x80) {
 585     Out.push_back(Rune & 0x7F);
 586   } else if (Rune < 0x800) {
 587     uint8_t FirstByte = 0xC0 | ((Rune & 0x7C0) >> 6);
 588     uint8_t SecondByte = 0x80 | (Rune & 0x3F);
 589     Out.push_back(FirstByte);
 590     Out.push_back(SecondByte);
 591   } else if (Rune < 0x10000) {
 592     uint8_t FirstByte = 0xE0 | ((Rune & 0xF000) >> 12);
 593     uint8_t SecondByte = 0x80 | ((Rune & 0xFC0) >> 6);
 594     uint8_t ThirdByte = 0x80 | (Rune & 0x3F);
 595     Out.push_back(FirstByte);
 596     Out.push_back(SecondByte);
 597     Out.push_back(ThirdByte);
 598   } else if (Rune < 0x110000) {
 599     uint8_t FirstByte = 0xF0 | ((Rune & 0x1F0000) >> 18);
 600     uint8_t SecondByte = 0x80 | ((Rune & 0x3F000) >> 12);
 601     uint8_t ThirdByte = 0x80 | ((Rune & 0xFC0) >> 6);
 602     uint8_t FourthByte = 0x80 | (Rune & 0x3F);
 603     Out.push_back(FirstByte);
 604     Out.push_back(SecondByte);
 605     Out.push_back(ThirdByte);
 606     Out.push_back(FourthByte);
 607   } else {
 608     llvm_unreachable("Invalid codepoint");
 609   }
 610 }
 611
 612 // Parse a UTF-16 \uNNNN escape sequence. "\u" has already been consumed.
 613 // May parse several sequential escapes to ensure proper surrogate handling.
 614 // We do not use ConvertUTF.h, it can't accept and replace unpaired surrogates.
 615 // These are invalid Unicode but valid JSON (RFC 8259, section 8.2).
 616 bool Parser::parseUnicode(std::string &Out) {
 617   // Invalid UTF is not a JSON error (RFC 8529§8.2). It gets replaced by U+FFFD.
 618   auto Invalid = [&] { Out.append(/* UTF-8 */ {'\xef', '\xbf', '\xbd'}); };
 619   // Decodes 4 hex digits from the stream into Out, returns false on error.
 620   auto Parse4Hex = [this](uint16_t &Out) -> bool {
 621     Out = 0;
 622     char Bytes[] = {next(), next(), next(), next()};
 623     for (unsigned char C : Bytes) {
 624       if (!std::isxdigit(C))
 625         return parseError("Invalid \\u escape sequence");
 626       Out <<= 4;
 627       Out |= (C > '9') ? (C & ~0x20) - 'A' + 10 : (C - '0');
 628     }
 629     return true;
 630   };
 631   uint16_t First; // UTF-16 code unit from the first \u escape.
 632   if (!Parse4Hex(First))
 633     return false;
 634
 635   // We loop to allow proper surrogate-pair error handling.
 636   while (true) {
 637     // Case 1: the UTF-16 code unit is already a codepoint in the BMP.
 638     if (LLVM_LIKELY(First < 0xD800 || First >= 0xE000)) {
 639       encodeUtf8(First, Out);
 640       return true;
 641     }
 642
 643     // Case 2: it's an (unpaired) trailing surrogate.
 644     if (LLVM_UNLIKELY(First >= 0xDC00)) {
 645       Invalid();
 646       return true;
 647     }
 648
 649     // Case 3: it's a leading surrogate. We expect a trailing one next.
 650     // Case 3a: there's no trailing \u escape. Don't advance in the stream.
 651     if (LLVM_UNLIKELY(P + 2 > End || *P != '\\' || *(P + 1) != 'u')) {
 652       Invalid(); // Leading surrogate was unpaired.
 653       return true;
 654     }
 655     P += 2;
 656     uint16_t Second;
 657     if (!Parse4Hex(Second))
 658       return false;
 659     // Case 3b: there was another \u escape, but it wasn't a trailing surrogate.
 660     if (LLVM_UNLIKELY(Second < 0xDC00 || Second >= 0xE000)) {
 661       Invalid();      // Leading surrogate was unpaired.
 662       First = Second; // Second escape still needs to be processed.
 663       continue;
 664     }
 665     // Case 3c: a valid surrogate pair encoding an astral codepoint.
 666     encodeUtf8(0x10000 | ((First - 0xD800) << 10) | (Second - 0xDC00), Out);
 667     return true;
 668   }
 669 }
 670
 671 bool Parser::parseError(const char *Msg) {
 672   int Line = 1;
 673   const char *StartOfLine = Start;
 674   for (const char *X = Start; X < P; ++X) {
 675     if (*X == 0x0A) {
 676       ++Line;
 677       StartOfLine = X + 1;
 678     }
 679   }
 680   Err.emplace(
 681       std::make_unique<ParseError>(Msg, Line, P - StartOfLine, P - Start));
 682   return false;
 683 }
 684 } // namespace
 685
 686 Expected<Value> parse(StringRef JSON) {
 687   Parser P(JSON);
 688   Value E = nullptr;
 689   if (P.checkUTF8())
 690     if (P.parseValue(E))
 691       if (P.assertEnd())
 692         return std::move(E);
 693   return P.takeError();
 694 }
 695 char ParseError::ID = 0;
 696
 697 bool isUTF8(llvm::StringRef S, size_t *ErrOffset) {
 698   // Fast-path for ASCII, which is valid UTF-8.
 699   if (LLVM_LIKELY(isASCII(S)))
 700     return true;
 701
 702   const UTF8 *Data = reinterpret_cast<const UTF8 *>(S.data()), *Rest = Data;
 703   if (LLVM_LIKELY(isLegalUTF8String(&Rest, Data + S.size())))
 704     return true;
 705
 706   if (ErrOffset)
 707     *ErrOffset = Rest - Data;
 708   return false;
 709 }
 710
 711 std::string fixUTF8(llvm::StringRef S) {
 712   // This isn't particularly efficient, but is only for error-recovery.
 713   std::vector<UTF32> Codepoints(S.size()); // 1 codepoint per byte suffices.
 714   const UTF8 *In8 = reinterpret_cast<const UTF8 *>(S.data());
 715   UTF32 *Out32 = Codepoints.data();
 716   ConvertUTF8toUTF32(&In8, In8 + S.size(), &Out32, Out32 + Codepoints.size(),
 717                      lenientConversion);
 718   Codepoints.resize(Out32 - Codepoints.data());
 719   std::string Res(4 * Codepoints.size(), 0); // 4 bytes per codepoint suffice
 720   const UTF32 *In32 = Codepoints.data();
 721   UTF8 *Out8 = reinterpret_cast<UTF8 *>(&Res[0]);
 722   ConvertUTF32toUTF8(&In32, In32 + Codepoints.size(), &Out8, Out8 + Res.size(),
 723                      strictConversion);
 724   Res.resize(reinterpret_cast<char *>(Out8) - Res.data());
 725   return Res;
 726 }
 727
 728 static void quote(llvm::raw_ostream &OS, llvm::StringRef S) {
 729   OS << '\"';
 730   for (unsigned char C : S) {
 731     if (C == 0x22 || C == 0x5C)
 732       OS << '\\';
 733     if (C >= 0x20) {
 734       OS << C;
 735       continue;
 736     }
 737     OS << '\\';
 738     switch (C) {
 739     // A few characters are common enough to make short escapes worthwhile.
 740     case '\t':
 741       OS << 't';
 742       break;
 743     case '\n':
 744       OS << 'n';
 745       break;
 746     case '\r':
 747       OS << 'r';
 748       break;
 749     default:
 750       OS << 'u';
 751       llvm::write_hex(OS, C, llvm::HexPrintStyle::Lower, 4);
 752       break;
 753     }
 754   }
 755   OS << '\"';
 756 }
 757
 758 void llvm::json::OStream::value(const Value &V) {
 759   switch (V.kind()) {
 760   case Value::Null:
 761     valueBegin();
 762     OS << "null";
 763     return;
 764   case Value::Boolean:
 765     valueBegin();
 766     OS << (*V.getAsBoolean() ? "true" : "false");
 767     return;
 768   case Value::Number:
 769     valueBegin();
 770     if (V.Type == Value::T_Integer)
 771       OS << *V.getAsInteger();
 772     else if (V.Type == Value::T_UINT64)
 773       OS << *V.getAsUINT64();
 774     else
 775       OS << format("%.*g", std::numeric_limits<double>::max_digits10,
 776                    *V.getAsNumber());
 777     return;
 778   case Value::String:
 779     valueBegin();
 780     quote(OS, *V.getAsString());
 781     return;
 782   case Value::Array:
 783     return array([&] {
 784       for (const Value &E : *V.getAsArray())
 785         value(E);
 786     });
 787   case Value::Object:
 788     return object([&] {
 789       for (const Object::value_type *E : sortedElements(*V.getAsObject()))
 790         attribute(E->first, E->second);
 791     });
 792   }
 793 }
 794
 795 void llvm::json::OStream::valueBegin() {
 796   assert(Stack.back().Ctx != Object && "Only attributes allowed here");
 797   if (Stack.back().HasValue) {
 798     assert(Stack.back().Ctx != Singleton && "Only one value allowed here");
 799     OS << ',';
 800   }
 801   if (Stack.back().Ctx == Array)
 802     newline();
 803   flushComment();
 804   Stack.back().HasValue = true;
 805 }
 806
 807 void OStream::comment(llvm::StringRef Comment) {
 808   assert(PendingComment.empty() && "Only one comment per value!");
 809   PendingComment = Comment;
 810 }
 811
 812 void OStream::flushComment() {
 813   if (PendingComment.empty())
 814     return;
 815   OS << (IndentSize ? "/* " : "/*");
 816   // Be sure not to accidentally emit "*/". Transform to "* /".
 817   while (!PendingComment.empty()) {
 818     auto Pos = PendingComment.find("*/");
 819     if (Pos == StringRef::npos) {
 820       OS << PendingComment;
 821       PendingComment = "";
 822     } else {
 823       OS << PendingComment.take_front(Pos) << "* /";
 824       PendingComment = PendingComment.drop_front(Pos + 2);
 825     }
 826   }
 827   OS << (IndentSize ? " */" : "*/");
 828   // Comments are on their own line unless attached to an attribute value.
 829   if (Stack.size() > 1 && Stack.back().Ctx == Singleton) {
 830     if (IndentSize)
 831       OS << ' ';
 832   } else {
 833     newline();
 834   }
 835 }
 836
 837 void llvm::json::OStream::newline() {
 838   if (IndentSize) {
 839     OS.write('\n');
 840     OS.indent(Indent);
 841   }
 842 }
 843
 844 void llvm::json::OStream::arrayBegin() {
 845   valueBegin();
 846   Stack.emplace_back();
 847   Stack.back().Ctx = Array;
 848   Indent += IndentSize;
 849   OS << '[';
 850 }
 851
 852 void llvm::json::OStream::arrayEnd() {
 853   assert(Stack.back().Ctx == Array);
 854   Indent -= IndentSize;
 855   if (Stack.back().HasValue)
 856     newline();
 857   OS << ']';
 858   assert(PendingComment.empty());
 859   Stack.pop_back();
 860   assert(!Stack.empty());
 861 }
 862
 863 void llvm::json::OStream::objectBegin() {
 864   valueBegin();
 865   Stack.emplace_back();
 866   Stack.back().Ctx = Object;
 867   Indent += IndentSize;
 868   OS << '{';
 869 }
 870
 871 void llvm::json::OStream::objectEnd() {
 872   assert(Stack.back().Ctx == Object);
 873   Indent -= IndentSize;
 874   if (Stack.back().HasValue)
 875     newline();
 876   OS << '}';
 877   assert(PendingComment.empty());
 878   Stack.pop_back();
 879   assert(!Stack.empty());
 880 }
 881
 882 void llvm::json::OStream::attributeBegin(llvm::StringRef Key) {
 883   assert(Stack.back().Ctx == Object);
 884   if (Stack.back().HasValue)
 885     OS << ',';
 886   newline();
 887   flushComment();
 888   Stack.back().HasValue = true;
 889   Stack.emplace_back();
 890   Stack.back().Ctx = Singleton;
 891   if (LLVM_LIKELY(isUTF8(Key))) {
 892     quote(OS, Key);
 893   } else {
 894     assert(false && "Invalid UTF-8 in attribute key");
 895     quote(OS, fixUTF8(Key));
 896   }
 897   OS.write(':');
 898   if (IndentSize)
 899     OS.write(' ');
 900 }
 901
 902 void llvm::json::OStream::attributeEnd() {
 903   assert(Stack.back().Ctx == Singleton);
 904   assert(Stack.back().HasValue && "Attribute must have a value");
 905   assert(PendingComment.empty());
 906   Stack.pop_back();
 907   assert(Stack.back().Ctx == Object);
 908 }
 909
 910 raw_ostream &llvm::json::OStream::rawValueBegin() {
 911   valueBegin();
 912   Stack.emplace_back();
 913   Stack.back().Ctx = RawValue;
 914   return OS;
 915 }
 916
 917 void llvm::json::OStream::rawValueEnd() {
 918   assert(Stack.back().Ctx == RawValue);
 919   Stack.pop_back();
 920 }
 921
 922 } // namespace json
 923 } // namespace llvm
 924
 925 void llvm::format_provider<llvm::json::Value>::format(
 926     const llvm::json::Value &E, raw_ostream &OS, StringRef Options) {
 927   unsigned IndentAmount = 0;
 928   if (!Options.empty() && Options.getAsInteger(/*Radix=*/10, IndentAmount))
 929     llvm_unreachable("json::Value format options should be an integer");
 930   json::OStream(OS, IndentAmount).value(E);
 931 }
 932