client/deps/tinycbor/cborpretty.c

   1 /****************************************************************************
   2 **
   3 ** Copyright (C) 2018 Intel Corporation
   4 **
   5 ** Permission is hereby granted, free of charge, to any person obtaining a copy
   6 ** of this software and associated documentation files (the "Software"), to deal
   7 ** in the Software without restriction, including without limitation the rights
   8 ** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   9 ** copies of the Software, and to permit persons to whom the Software is
  10 ** furnished to do so, subject to the following conditions:
  11 **
  12 ** The above copyright notice and this permission notice shall be included in
  13 ** all copies or substantial portions of the Software.
  14 **
  15 ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 ** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 ** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18 ** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 ** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20 ** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  21 ** THE SOFTWARE.
  22 **
  23 ****************************************************************************/
  24
  25 #define _BSD_SOURCE 1
  26 #define _DEFAULT_SOURCE 1
  27 #ifndef __STDC_LIMIT_MACROS
  28 #  define __STDC_LIMIT_MACROS 1
  29 #endif
  30
  31 #include "cbor.h"
  32 #include "cborinternal_p.h"
  33 #include "compilersupport_p.h"
  34 #include "utf8_p.h"
  35
  36 #include <inttypes.h>
  37 #include <string.h>
  38
  39 /**
  40  * \defgroup CborPretty Converting CBOR to text
  41  * \brief Group of functions used to convert CBOR to text form.
  42  *
  43  * This group contains two functions that can be used to convert a \ref
  44  * CborValue object to a text representation. This module attempts to follow
  45  * the recommendations from RFC 7049 section 6 "Diagnostic Notation", though it
  46  * has a few differences. They are noted below.
  47  *
  48  * TinyCBOR does not provide a way to convert from the text representation back
  49  * to encoded form. To produce a text form meant to be parsed, CborToJson is
  50  * recommended instead.
  51  *
  52  * Either of the functions in this section will attempt to convert exactly one
  53  * CborValue object to text. Those functions may return any error documented
  54  * for the functions for CborParsing. In addition, if the C standard library
  55  * stream functions return with error, the text conversion will return with
  56  * error CborErrorIO.
  57  *
  58  * These functions also perform UTF-8 validation in CBOR text strings. If they
  59  * encounter a sequence of bytes that is not permitted in UTF-8, they will return
  60  * CborErrorInvalidUtf8TextString. That includes encoding of surrogate points
  61  * in UTF-8.
  62  *
  63  * \warning The output type produced by these functions is not guaranteed to
  64  * remain stable. A future update of TinyCBOR may produce different output for
  65  * the same input and parsers may be unable to handle it.
  66  *
  67  * \sa CborParsing, CborToJson, cbor_parser_init()
  68  */
  69
  70 /**
  71  * \addtogroup CborPretty
  72  * @{
  73  * <h2 class="groupheader">Text format</h2>
  74  *
  75  * As described in RFC 7049 section 6 "Diagnostic Notation", the format is
  76  * largely borrowed from JSON, but modified to suit CBOR's different data
  77  * types. TinyCBOR makes further modifications to distinguish different, but
  78  * similar values.
  79  *
  80  * CBOR values are currently encoded as follows:
  81  * \par Integrals (unsigned and negative)
  82  *      Base-10 (decimal) text representation of the value
  83  * \par Byte strings:
  84  *      <tt>"h'"</tt> followed by the Base16 (hex) representation of the binary data, followed by an ending quote (')
  85  * \par Text strings:
  86  *      C-style escaped string in quotes, with C11/C++11 escaping of Unicode codepoints above U+007F.
  87  * \par Tags:
  88  *      Tag value, with the tagged value in parentheses. No special encoding of the tagged value is performed.
  89  * \par Simple types:
  90  *      <tt>"simple(nn)"</tt> where \c nn is the simple value
  91  * \par Null:
  92  *      \c null
  93  * \par Undefined:
  94  *      \c undefined
  95  * \par Booleans:
  96  *      \c true or \c false
  97  * \par Floating point:
  98  *      If NaN or infinite, the actual words \c NaN or \c infinite.
  99  *      Otherwise, the decimal representation with as many digits as necessary to ensure no loss of information.
 100  *      By default, float values are suffixed by "f" and half-float values suffixed by "f16" (doubles have no suffix).
 101  *      If the CborPrettyNumericEncodingIndicators flag is active, the values instead are encoded following the
 102  *      Section 6 recommended encoding indicators: float values are suffixed with "_2" and half-float with "_1".
 103  *      A decimal point is always present.
 104  * \par Arrays:
 105  *      Comma-separated list of elements, enclosed in square brackets ("[" and "]").
 106  * \par Maps:
 107  *      Comma-separated list of key-value pairs, with the key and value separated
 108  *      by a colon (":"), enclosed in curly braces ("{" and "}").
 109  *
 110  * The CborPrettyFlags enumerator contains flags to control some aspects of the
 111  * encoding:
 112  * \par String fragmentation
 113  *      When the CborPrettyShowStringFragments option is active, text and byte
 114  *      strings that are transmitted in fragments are shown instead inside
 115  *      parentheses ("(" and ")") with no preceding number and each fragment is
 116  *      displayed individually. If a tag precedes the string, then the output
 117  *      will contain a double set of parentheses. If the option is not active,
 118  *      the fragments are merged together and the display will not show any
 119  *      difference from a string transmitted with determinate length.
 120  * \par Encoding indicators
 121  *      Numbers and lengths in CBOR can be encoded in multiple representations.
 122  *      If the CborPrettyIndicateOverlongNumbers option is active, numbers
 123  *      and lengths that are transmitted in a longer encoding than necessary
 124  *      will be indicated, by appending an underscore ("_") to either the
 125  *      number or the opening bracket or brace, followed by a number
 126  *      indicating the CBOR additional information: 0 for 1 byte, 1 for 2
 127  *      bytes, 2 for 4 bytes and 3 for 8 bytes.
 128  *      If the CborPrettyIndicateIndeterminateLength option is active, maps,
 129  *      arrays and strings encoded with indeterminate length will be marked by
 130  *      an underscore after the opening bracket or brace or the string (if not
 131  *      showing fragments), without a number after it.
 132  */
 133
 134 /**
 135  * \enum CborPrettyFlags
 136  * The CborPrettyFlags enum contains flags that control the conversion of CBOR to text format.
 137  *
 138  * \value CborPrettyNumericEncodingIndicators   Use numeric encoding indicators instead of textual for float and half-float.
 139  * \value CborPrettyTextualEncodingIndicators   Use textual encoding indicators for float ("f") and half-float ("f16").
 140  * \value CborPrettyIndicateIndeterminateLength (default) Indicate when a map or array has indeterminate length.
 141  * \value CborPrettyIndicateOverlongNumbers     Indicate when a number or length was encoded with more bytes than needed.
 142  * \value CborPrettyShowStringFragments         If the byte or text string is transmitted in chunks, show each individually.
 143  * \value CborPrettyMergeStringFragment         Merge all chunked byte or text strings and display them in a single entry.
 144  * \value CborPrettyDefaultFlags                Default conversion flags.
 145  */
 146
 147 #ifndef CBOR_NO_FLOATING_POINT
 148 static inline bool convertToUint64(double v, uint64_t *absolute) {
 149     double supremum;
 150     v = fabs(v);
 151
 152     /* C11 standard section 6.3.1.4 "Real floating and integer" says:
 153      *
 154      *  1 When a finite value of real floating type is converted to an integer
 155      *    type other than _Bool, the fractional part is discarded (i.e., the
 156      *    value is truncated toward zero). If the value of the integral part
 157      *    cannot be represented by the integer type, the behavior is undefined.
 158      *
 159      * So we must perform a range check that v <= UINT64_MAX, but we can't use
 160      * UINT64_MAX + 1.0 because the standard continues:
 161      *
 162      *  2 When a value of integer type is converted to a real floating type, if
 163      *    the value being converted can be represented exactly in the new type,
 164      *    it is unchanged. If the value being converted is in the range of
 165      *    values that can be represented but cannot be represented exactly, the
 166      *    result is either the nearest higher or nearest lower representable
 167      *    value, chosen in an implementation-defined manner.
 168      */
 169     supremum = -2.0 * INT64_MIN;     /* -2 * (- 2^63) == 2^64 */
 170     if (v >= supremum)
 171         return false;
 172
 173     /* Now we can convert, these two conversions cannot be UB */
 174     *absolute = v;
 175     return *absolute == v;
 176 }
 177 #endif
 178
 179 static void printRecursionLimit(CborStreamFunction stream, void *out) {
 180     stream(out, "<nesting too deep, recursion stopped>");
 181 }
 182
 183 static CborError hexDump(CborStreamFunction stream, void *out, const void *ptr, size_t n) {
 184     const uint8_t *buffer = (const uint8_t *)ptr;
 185     CborError err = CborNoError;
 186     while (n-- && !err)
 187         err = stream(out, "%02" PRIx8, *buffer++);
 188
 189     return err;
 190 }
 191
 192 /* This function decodes buffer as UTF-8 and prints as escaped UTF-16.
 193  * On UTF-8 decoding error, it returns CborErrorInvalidUtf8TextString */
 194 static CborError utf8EscapedDump(CborStreamFunction stream, void *out, const void *ptr, size_t n) {
 195     const uint8_t *buffer = (const uint8_t *)ptr;
 196     const uint8_t *const end = buffer + n;
 197     CborError err = CborNoError;
 198
 199     while (buffer < end && !err) {
 200         uint32_t uc = get_utf8(&buffer, end);
 201         if (uc == ~0U)
 202             return CborErrorInvalidUtf8TextString;
 203
 204         if (uc < 0x80) {
 205             /* single-byte UTF-8 */
 206             unsigned char escaped = (unsigned char)uc;
 207             if (uc < 0x7f && uc >= 0x20 && uc != '\\' && uc != '"') {
 208                 err = stream(out, "%c", (char)uc);
 209                 continue;
 210             }
 211
 212             /* print as an escape sequence */
 213             switch (uc) {
 214                 case '"':
 215                 case '\\':
 216                     break;
 217                 case '\b':
 218                     escaped = 'b';
 219                     break;
 220                 case '\f':
 221                     escaped = 'f';
 222                     break;
 223                 case '\n':
 224                     escaped = 'n';
 225                     break;
 226                 case '\r':
 227                     escaped = 'r';
 228                     break;
 229                 case '\t':
 230                     escaped = 't';
 231                     break;
 232                 default:
 233                     goto print_utf16;
 234             }
 235             err = stream(out, "\\%c", escaped);
 236             continue;
 237         }
 238
 239         /* now print the sequence */
 240         if (uc > 0xffffU) {
 241             /* needs surrogate pairs */
 242             err = stream(out, "\\u%04" PRIX32 "\\u%04" PRIX32,
 243                          (uc >> 10) + 0xd7c0,    /* high surrogate */
 244                          (uc % 0x0400) + 0xdc00);
 245         } else {
 246 print_utf16:
 247             /* no surrogate pair needed */
 248             err = stream(out, "\\u%04" PRIX32, uc);
 249         }
 250     }
 251     return err;
 252 }
 253
 254 static const char *resolve_indicator(const uint8_t *ptr, const uint8_t *end, int flags) {
 255     static const char indicators[8][3] = {
 256         "_0", "_1", "_2", "_3",
 257         "", "", "",             /* these are not possible */
 258         "_"
 259     };
 260     const char *no_indicator = indicators[5];   /* empty string */
 261     uint8_t additional_information;
 262     uint8_t expected_information;
 263     uint64_t value;
 264     CborError err;
 265
 266     if (ptr == end)
 267         return NULL;    /* CborErrorUnexpectedEOF */
 268
 269     additional_information = (*ptr & SmallValueMask);
 270     if (additional_information < Value8Bit)
 271         return no_indicator;
 272
 273     /* determine whether to show anything */
 274     if ((flags & CborPrettyIndicateIndeterminateLength) &&
 275             additional_information == IndefiniteLength)
 276         return indicators[IndefiniteLength - Value8Bit];
 277     if ((flags & CborPrettyIndicateOverlongNumbers) == 0)
 278         return no_indicator;
 279
 280     err = _cbor_value_extract_number(&ptr, end, &value);
 281     if (err)
 282         return NULL;    /* CborErrorUnexpectedEOF */
 283
 284     expected_information = Value8Bit - 1;
 285     if (value >= Value8Bit)
 286         ++expected_information;
 287     if (value > 0xffU)
 288         ++expected_information;
 289     if (value > 0xffffU)
 290         ++expected_information;
 291     if (value > 0xffffffffU)
 292         ++expected_information;
 293     return expected_information == additional_information ?
 294            no_indicator :
 295            indicators[additional_information - Value8Bit];
 296 }
 297
 298 static const char *get_indicator(const CborValue *it, int flags) {
 299     return resolve_indicator(it->ptr, it->parser->end, flags);
 300 }
 301
 302 static CborError value_to_pretty(CborStreamFunction stream, void *out, CborValue *it, int flags, int recursionsLeft);
 303 static CborError container_to_pretty(CborStreamFunction stream, void *out, CborValue *it, CborType containerType,
 304                                      int flags, int recursionsLeft) {
 305     const char *comma = "";
 306     CborError err = CborNoError;
 307
 308     if (!recursionsLeft) {
 309         printRecursionLimit(stream, out);
 310         return err;     /* do allow the dumping to continue */
 311     }
 312
 313     while (!cbor_value_at_end(it) && !err) {
 314         err = stream(out, "%s", comma);
 315         comma = ", ";
 316
 317         if (!err)
 318             err = value_to_pretty(stream, out, it, flags, recursionsLeft);
 319
 320         if (containerType == CborArrayType)
 321             continue;
 322
 323         /* map: that was the key, so get the value */
 324         if (!err)
 325             err = stream(out, ": ");
 326         if (!err)
 327             err = value_to_pretty(stream, out, it, flags, recursionsLeft);
 328     }
 329     return err;
 330 }
 331
 332 static CborError value_to_pretty(CborStreamFunction stream, void *out, CborValue *it, int flags, int recursionsLeft) {
 333     CborError err = CborNoError;
 334     CborType type = cbor_value_get_type(it);
 335     switch (type) {
 336         case CborArrayType:
 337         case CborMapType: {
 338             /* recursive type */
 339             CborValue recursed;
 340             const char *indicator = get_indicator(it, flags);
 341             const char *space = *indicator ? " " : indicator;
 342
 343             err = stream(out, "%c%s%s", type == CborArrayType ? '[' : '{', indicator, space);
 344             if (err)
 345                 return err;
 346
 347             err = cbor_value_enter_container(it, &recursed);
 348             if (err) {
 349                 it->ptr = recursed.ptr;
 350                 return err;       /* parse error */
 351             }
 352             err = container_to_pretty(stream, out, &recursed, type, flags, recursionsLeft - 1);
 353             if (err) {
 354                 it->ptr = recursed.ptr;
 355                 return err;       /* parse error */
 356             }
 357             err = cbor_value_leave_container(it, &recursed);
 358             if (err)
 359                 return err;       /* parse error */
 360
 361             return stream(out, type == CborArrayType ? "]" : "}");
 362         }
 363
 364         case CborIntegerType: {
 365             uint64_t val;
 366             cbor_value_get_raw_integer(it, &val);    /* can't fail */
 367
 368             if (cbor_value_is_unsigned_integer(it)) {
 369                 err = stream(out, "%" PRIu64, val);
 370             } else {
 371                 /* CBOR stores the negative number X as -1 - X
 372                  * (that is, -1 is stored as 0, -2 as 1 and so forth) */
 373                 if (++val) {                /* unsigned overflow may happen */
 374                     err = stream(out, "-%" PRIu64, val);
 375                 } else {
 376                     /* overflown
 377                      *   0xffff`ffff`ffff`ffff + 1 =
 378                      * 0x1`0000`0000`0000`0000 = 18446744073709551616 (2^64) */
 379                     err = stream(out, "-18446744073709551616");
 380                 }
 381             }
 382             if (!err)
 383                 err = stream(out, "%s", get_indicator(it, flags));
 384             break;
 385         }
 386
 387         case CborByteStringType:
 388         case CborTextStringType: {
 389             size_t n = 0;
 390             const void *ptr;
 391             bool showingFragments = (flags & CborPrettyShowStringFragments) && !cbor_value_is_length_known(it);
 392             const char *separator = "";
 393             char close = '\'';
 394             char open[3] = "h'";
 395             const char *indicator = NULL;
 396
 397             if (type == CborTextStringType) {
 398                 close = open[0] = '"';
 399                 open[1] = '\0';
 400             }
 401
 402             if (showingFragments) {
 403                 err = stream(out, "(_ ");
 404                 if (!err)
 405                     err = _cbor_value_prepare_string_iteration(it);
 406             } else {
 407                 err = stream(out, "%s", open);
 408             }
 409
 410             while (!err) {
 411                 if (showingFragments || indicator == NULL) {
 412                     /* any iteration, except the second for a non-chunked string */
 413                     indicator = resolve_indicator(it->ptr, it->parser->end, flags);
 414                 }
 415
 416                 err = _cbor_value_get_string_chunk(it, &ptr, &n, it);
 417                 if (!ptr)
 418                     break;
 419
 420                 if (!err && showingFragments)
 421                     err = stream(out, "%s%s", separator, open);
 422                 if (!err)
 423                     err = (type == CborByteStringType ?
 424                            hexDump(stream, out, ptr, n) :
 425                            utf8EscapedDump(stream, out, ptr, n));
 426                 if (!err && showingFragments) {
 427                     err = stream(out, "%c%s", close, indicator);
 428                     separator = ", ";
 429                 }
 430             }
 431
 432             if (!err) {
 433                 if (showingFragments)
 434                     err = stream(out, ")");
 435                 else
 436                     err = stream(out, "%c%s", close, indicator);
 437             }
 438             return err;
 439         }
 440
 441         case CborTagType: {
 442             CborTag tag;
 443             cbor_value_get_tag(it, &tag);       /* can't fail */
 444             err = stream(out, "%" PRIu64 "%s(", tag, get_indicator(it, flags));
 445             if (!err)
 446                 err = cbor_value_advance_fixed(it);
 447             if (!err && recursionsLeft)
 448                 err = value_to_pretty(stream, out, it, flags, recursionsLeft - 1);
 449             else if (!err)
 450                 printRecursionLimit(stream, out);
 451             if (!err)
 452                 err = stream(out, ")");
 453             return err;
 454         }
 455
 456         case CborSimpleType: {
 457             /* simple types can't fail and can't have overlong encoding */
 458             uint8_t simple_type;
 459             cbor_value_get_simple_type(it, &simple_type);
 460             err = stream(out, "simple(%" PRIu8 ")", simple_type);
 461             break;
 462         }
 463
 464         case CborNullType:
 465             err = stream(out, "null");
 466             break;
 467
 468         case CborUndefinedType:
 469             err = stream(out, "undefined");
 470             break;
 471
 472         case CborBooleanType: {
 473             bool val;
 474             cbor_value_get_boolean(it, &val);       /* can't fail */
 475             err = stream(out, val ? "true" : "false");
 476             break;
 477         }
 478
 479 #ifndef CBOR_NO_FLOATING_POINT
 480         case CborDoubleType: {
 481             const char *suffix;
 482             double val;
 483             int r;
 484             uint64_t ival;
 485
 486             if (false) {
 487                 float f;
 488                 case CborFloatType:
 489                     cbor_value_get_float(it, &f);
 490                     val = f;
 491                     suffix = (flags & CborPrettyNumericEncodingIndicators) ? "_2" : "f";
 492                 } else if (false) {
 493                     uint16_t f16;
 494                 case CborHalfFloatType:
 495 #ifndef CBOR_NO_HALF_FLOAT_TYPE
 496                     cbor_value_get_half_float(it, &f16);
 497                     val = decode_half(f16);
 498                     suffix = (flags & CborPrettyNumericEncodingIndicators) ? "_1" : "f16";
 499 #else
 500                     (void)f16;
 501                     err = CborErrorUnsupportedType;
 502                     break;
 503 #endif
 504                 } else {
 505                     cbor_value_get_double(it, &val);
 506                     suffix = "";
 507                 }
 508
 509                 if ((flags & CborPrettyNumericEncodingIndicators) == 0) {
 510                     r = fpclassify(val);
 511                     if (r == FP_NAN || r == FP_INFINITE)
 512                         suffix = "";
 513                 }
 514
 515                 if (convertToUint64(val, &ival)) {
 516                     /* this double value fits in a 64-bit integer, so show it as such
 517                      * (followed by a floating point suffix, to disambiguate) */
 518                     err = stream(out, "%s%" PRIu64 ".%s", val < 0 ? "-" : "", ival, suffix);
 519                 } else {
 520                     /* this number is definitely not a 64-bit integer */
 521                     err = stream(out, "%." DBL_DECIMAL_DIG_STR "g%s", val, suffix);
 522                 }
 523                 break;
 524             }
 525 #else
 526         case CborDoubleType:
 527         case CborFloatType:
 528         case CborHalfFloatType:
 529             err = CborErrorUnsupportedType;
 530             break;
 531 #endif /* !CBOR_NO_FLOATING_POINT */
 532
 533         case CborInvalidType:
 534             err = stream(out, "invalid");
 535             if (err)
 536                 return err;
 537             return CborErrorUnknownType;
 538     }
 539
 540     if (!err)
 541         err = cbor_value_advance_fixed(it);
 542     return err;
 543 }
 544
 545 /**
 546  * Converts the current CBOR type pointed by \a value to its textual
 547  * representation and writes it to the stream by calling the \a streamFunction.
 548  * If an error occurs, this function returns an error code similar to
 549  * \ref CborParsing.
 550  *
 551  * The textual representation can be controlled by the \a flags parameter (see
 552  * \ref CborPrettyFlags for more information).
 553  *
 554  * If no error ocurred, this function advances \a value to the next element.
 555  * Often, concatenating the text representation of multiple elements can be
 556  * done by appending a comma to the output stream in between calls to this
 557  * function.
 558  *
 559  * The \a streamFunction function will be called with the \a token value as the
 560  * first parameter and a printf-style format string as the second, with a variable
 561  * number of further parameters.
 562  *
 563  * \sa cbor_value_to_pretty(), cbor_value_to_json_advance()
 564  */
 565 CborError cbor_value_to_pretty_stream(CborStreamFunction streamFunction, void *token, CborValue *value, int flags) {
 566     return value_to_pretty(streamFunction, token, value, flags, CBOR_PARSER_MAX_RECURSIONS);
 567 }
 568
 569 /** @} */